{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.067133537589362, "eval_steps": 1000, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.171875, "learning_rate": 1.3368983957219251e-07, "loss": 2.8446, "step": 1 }, { "epoch": 0.0, "grad_norm": 3.265625, "learning_rate": 2.6737967914438503e-07, "loss": 2.8493, "step": 2 }, { "epoch": 0.0, "grad_norm": 3.046875, "learning_rate": 4.010695187165776e-07, "loss": 2.8359, "step": 3 }, { "epoch": 0.0, "grad_norm": 3.15625, "learning_rate": 5.347593582887701e-07, "loss": 2.8134, "step": 4 }, { "epoch": 0.0, "grad_norm": 3.125, "learning_rate": 6.684491978609626e-07, "loss": 2.8552, "step": 5 }, { "epoch": 0.0, "grad_norm": 3.078125, "learning_rate": 8.021390374331552e-07, "loss": 2.8164, "step": 6 }, { "epoch": 0.0, "grad_norm": 2.9375, "learning_rate": 9.358288770053476e-07, "loss": 2.8432, "step": 7 }, { "epoch": 0.0, "grad_norm": 3.1875, "learning_rate": 1.0695187165775401e-06, "loss": 2.8493, "step": 8 }, { "epoch": 0.0, "grad_norm": 3.125, "learning_rate": 1.2032085561497326e-06, "loss": 2.848, "step": 9 }, { "epoch": 0.0, "grad_norm": 3.0625, "learning_rate": 1.3368983957219252e-06, "loss": 2.8311, "step": 10 }, { "epoch": 0.0, "grad_norm": 3.0, "learning_rate": 1.4705882352941177e-06, "loss": 2.8485, "step": 11 }, { "epoch": 0.0, "grad_norm": 2.953125, "learning_rate": 1.6042780748663105e-06, "loss": 2.8348, "step": 12 }, { "epoch": 0.0, "grad_norm": 3.09375, "learning_rate": 1.7379679144385028e-06, "loss": 2.8304, "step": 13 }, { "epoch": 0.0, "grad_norm": 3.0625, "learning_rate": 1.8716577540106951e-06, "loss": 2.8923, "step": 14 }, { "epoch": 0.0, "grad_norm": 3.109375, "learning_rate": 2.0053475935828877e-06, "loss": 2.7946, "step": 15 }, { "epoch": 0.0, "grad_norm": 3.0, "learning_rate": 2.1390374331550802e-06, "loss": 2.8179, "step": 16 }, { "epoch": 0.0, "grad_norm": 2.984375, "learning_rate": 2.2727272727272728e-06, "loss": 2.8344, "step": 17 }, { "epoch": 0.0, "grad_norm": 3.0625, "learning_rate": 2.4064171122994653e-06, "loss": 2.833, "step": 18 }, { "epoch": 0.0, "grad_norm": 3.015625, "learning_rate": 2.540106951871658e-06, "loss": 2.8266, "step": 19 }, { "epoch": 0.0, "grad_norm": 3.125, "learning_rate": 2.6737967914438504e-06, "loss": 2.8237, "step": 20 }, { "epoch": 0.0, "grad_norm": 3.0625, "learning_rate": 2.807486631016043e-06, "loss": 2.8598, "step": 21 }, { "epoch": 0.0, "grad_norm": 3.0625, "learning_rate": 2.9411764705882355e-06, "loss": 2.8227, "step": 22 }, { "epoch": 0.0, "grad_norm": 3.0, "learning_rate": 3.074866310160428e-06, "loss": 2.8018, "step": 23 }, { "epoch": 0.0, "grad_norm": 3.09375, "learning_rate": 3.208556149732621e-06, "loss": 2.8525, "step": 24 }, { "epoch": 0.0, "grad_norm": 2.921875, "learning_rate": 3.342245989304813e-06, "loss": 2.8392, "step": 25 }, { "epoch": 0.0, "grad_norm": 2.953125, "learning_rate": 3.4759358288770056e-06, "loss": 2.8455, "step": 26 }, { "epoch": 0.0, "grad_norm": 3.015625, "learning_rate": 3.6096256684491977e-06, "loss": 2.8524, "step": 27 }, { "epoch": 0.0, "grad_norm": 3.03125, "learning_rate": 3.7433155080213903e-06, "loss": 2.8419, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.953125, "learning_rate": 3.877005347593583e-06, "loss": 2.8071, "step": 29 }, { "epoch": 0.0, "grad_norm": 2.953125, "learning_rate": 4.010695187165775e-06, "loss": 2.8521, "step": 30 }, { "epoch": 0.0, "grad_norm": 3.0, "learning_rate": 4.144385026737968e-06, "loss": 2.8692, "step": 31 }, { "epoch": 0.0, "grad_norm": 2.984375, "learning_rate": 4.2780748663101604e-06, "loss": 2.8191, "step": 32 }, { "epoch": 0.0, "grad_norm": 2.84375, "learning_rate": 4.411764705882353e-06, "loss": 2.8433, "step": 33 }, { "epoch": 0.0, "grad_norm": 2.8125, "learning_rate": 4.5454545454545455e-06, "loss": 2.8276, "step": 34 }, { "epoch": 0.0, "grad_norm": 2.8125, "learning_rate": 4.6791443850267385e-06, "loss": 2.8153, "step": 35 }, { "epoch": 0.0, "grad_norm": 2.859375, "learning_rate": 4.812834224598931e-06, "loss": 2.819, "step": 36 }, { "epoch": 0.0, "grad_norm": 2.78125, "learning_rate": 4.9465240641711236e-06, "loss": 2.8313, "step": 37 }, { "epoch": 0.01, "grad_norm": 2.71875, "learning_rate": 5.080213903743316e-06, "loss": 2.805, "step": 38 }, { "epoch": 0.01, "grad_norm": 2.84375, "learning_rate": 5.213903743315509e-06, "loss": 2.82, "step": 39 }, { "epoch": 0.01, "grad_norm": 2.640625, "learning_rate": 5.347593582887701e-06, "loss": 2.7807, "step": 40 }, { "epoch": 0.01, "grad_norm": 2.625, "learning_rate": 5.481283422459893e-06, "loss": 2.7946, "step": 41 }, { "epoch": 0.01, "grad_norm": 2.6875, "learning_rate": 5.614973262032086e-06, "loss": 2.7926, "step": 42 }, { "epoch": 0.01, "grad_norm": 2.609375, "learning_rate": 5.748663101604279e-06, "loss": 2.7963, "step": 43 }, { "epoch": 0.01, "grad_norm": 2.546875, "learning_rate": 5.882352941176471e-06, "loss": 2.8348, "step": 44 }, { "epoch": 0.01, "grad_norm": 2.484375, "learning_rate": 6.016042780748663e-06, "loss": 2.8164, "step": 45 }, { "epoch": 0.01, "grad_norm": 2.421875, "learning_rate": 6.149732620320856e-06, "loss": 2.8046, "step": 46 }, { "epoch": 0.01, "grad_norm": 2.359375, "learning_rate": 6.283422459893049e-06, "loss": 2.8252, "step": 47 }, { "epoch": 0.01, "grad_norm": 2.4375, "learning_rate": 6.417112299465242e-06, "loss": 2.8269, "step": 48 }, { "epoch": 0.01, "grad_norm": 2.328125, "learning_rate": 6.550802139037433e-06, "loss": 2.865, "step": 49 }, { "epoch": 0.01, "grad_norm": 2.34375, "learning_rate": 6.684491978609626e-06, "loss": 2.8301, "step": 50 }, { "epoch": 0.01, "grad_norm": 2.28125, "learning_rate": 6.818181818181818e-06, "loss": 2.8149, "step": 51 }, { "epoch": 0.01, "grad_norm": 2.234375, "learning_rate": 6.951871657754011e-06, "loss": 2.7907, "step": 52 }, { "epoch": 0.01, "grad_norm": 2.1875, "learning_rate": 7.085561497326204e-06, "loss": 2.7999, "step": 53 }, { "epoch": 0.01, "grad_norm": 2.296875, "learning_rate": 7.2192513368983955e-06, "loss": 2.8087, "step": 54 }, { "epoch": 0.01, "grad_norm": 2.109375, "learning_rate": 7.3529411764705884e-06, "loss": 2.8067, "step": 55 }, { "epoch": 0.01, "grad_norm": 2.078125, "learning_rate": 7.4866310160427806e-06, "loss": 2.7903, "step": 56 }, { "epoch": 0.01, "grad_norm": 3.625, "learning_rate": 7.6203208556149735e-06, "loss": 2.8308, "step": 57 }, { "epoch": 0.01, "grad_norm": 2.046875, "learning_rate": 7.754010695187166e-06, "loss": 2.8285, "step": 58 }, { "epoch": 0.01, "grad_norm": 1.9609375, "learning_rate": 7.887700534759358e-06, "loss": 2.7952, "step": 59 }, { "epoch": 0.01, "grad_norm": 2.046875, "learning_rate": 8.02139037433155e-06, "loss": 2.8352, "step": 60 }, { "epoch": 0.01, "grad_norm": 1.921875, "learning_rate": 8.155080213903744e-06, "loss": 2.8229, "step": 61 }, { "epoch": 0.01, "grad_norm": 1.9140625, "learning_rate": 8.288770053475937e-06, "loss": 2.8107, "step": 62 }, { "epoch": 0.01, "grad_norm": 1.8515625, "learning_rate": 8.42245989304813e-06, "loss": 2.8023, "step": 63 }, { "epoch": 0.01, "grad_norm": 1.8515625, "learning_rate": 8.556149732620321e-06, "loss": 2.791, "step": 64 }, { "epoch": 0.01, "grad_norm": 1.796875, "learning_rate": 8.689839572192514e-06, "loss": 2.7683, "step": 65 }, { "epoch": 0.01, "grad_norm": 1.7578125, "learning_rate": 8.823529411764707e-06, "loss": 2.8053, "step": 66 }, { "epoch": 0.01, "grad_norm": 1.765625, "learning_rate": 8.957219251336898e-06, "loss": 2.7903, "step": 67 }, { "epoch": 0.01, "grad_norm": 1.625, "learning_rate": 9.090909090909091e-06, "loss": 2.8099, "step": 68 }, { "epoch": 0.01, "grad_norm": 1.6796875, "learning_rate": 9.224598930481284e-06, "loss": 2.772, "step": 69 }, { "epoch": 0.01, "grad_norm": 1.5703125, "learning_rate": 9.358288770053477e-06, "loss": 2.7808, "step": 70 }, { "epoch": 0.01, "grad_norm": 1.5390625, "learning_rate": 9.49197860962567e-06, "loss": 2.8032, "step": 71 }, { "epoch": 0.01, "grad_norm": 1.5234375, "learning_rate": 9.625668449197861e-06, "loss": 2.8121, "step": 72 }, { "epoch": 0.01, "grad_norm": 1.5, "learning_rate": 9.759358288770054e-06, "loss": 2.7719, "step": 73 }, { "epoch": 0.01, "grad_norm": 1.4453125, "learning_rate": 9.893048128342247e-06, "loss": 2.8009, "step": 74 }, { "epoch": 0.01, "grad_norm": 1.3984375, "learning_rate": 1.0026737967914438e-05, "loss": 2.7826, "step": 75 }, { "epoch": 0.01, "grad_norm": 1.390625, "learning_rate": 1.0160427807486631e-05, "loss": 2.7934, "step": 76 }, { "epoch": 0.01, "grad_norm": 1.2734375, "learning_rate": 1.0294117647058824e-05, "loss": 2.7849, "step": 77 }, { "epoch": 0.01, "grad_norm": 1.2578125, "learning_rate": 1.0427807486631017e-05, "loss": 2.7943, "step": 78 }, { "epoch": 0.01, "grad_norm": 1.2578125, "learning_rate": 1.056149732620321e-05, "loss": 2.795, "step": 79 }, { "epoch": 0.01, "grad_norm": 1.2265625, "learning_rate": 1.0695187165775402e-05, "loss": 2.7848, "step": 80 }, { "epoch": 0.01, "grad_norm": 1.234375, "learning_rate": 1.0828877005347594e-05, "loss": 2.7753, "step": 81 }, { "epoch": 0.01, "grad_norm": 1.15625, "learning_rate": 1.0962566844919786e-05, "loss": 2.7417, "step": 82 }, { "epoch": 0.01, "grad_norm": 1.125, "learning_rate": 1.1096256684491979e-05, "loss": 2.7568, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.15625, "learning_rate": 1.1229946524064172e-05, "loss": 2.7753, "step": 84 }, { "epoch": 0.01, "grad_norm": 1.0625, "learning_rate": 1.1363636363636365e-05, "loss": 2.7719, "step": 85 }, { "epoch": 0.01, "grad_norm": 1.0546875, "learning_rate": 1.1497326203208558e-05, "loss": 2.7757, "step": 86 }, { "epoch": 0.01, "grad_norm": 1.1171875, "learning_rate": 1.163101604278075e-05, "loss": 2.763, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.0546875, "learning_rate": 1.1764705882352942e-05, "loss": 2.7709, "step": 88 }, { "epoch": 0.01, "grad_norm": 0.9921875, "learning_rate": 1.1898395721925135e-05, "loss": 2.8043, "step": 89 }, { "epoch": 0.01, "grad_norm": 1.015625, "learning_rate": 1.2032085561497326e-05, "loss": 2.7688, "step": 90 }, { "epoch": 0.01, "grad_norm": 0.96875, "learning_rate": 1.2165775401069519e-05, "loss": 2.7447, "step": 91 }, { "epoch": 0.01, "grad_norm": 0.984375, "learning_rate": 1.2299465240641712e-05, "loss": 2.7492, "step": 92 }, { "epoch": 0.01, "grad_norm": 0.94921875, "learning_rate": 1.2433155080213903e-05, "loss": 2.7402, "step": 93 }, { "epoch": 0.01, "grad_norm": 0.93359375, "learning_rate": 1.2566844919786098e-05, "loss": 2.7543, "step": 94 }, { "epoch": 0.01, "grad_norm": 0.94140625, "learning_rate": 1.2700534759358291e-05, "loss": 2.7312, "step": 95 }, { "epoch": 0.01, "grad_norm": 0.91796875, "learning_rate": 1.2834224598930484e-05, "loss": 2.7682, "step": 96 }, { "epoch": 0.01, "grad_norm": 0.87890625, "learning_rate": 1.2967914438502673e-05, "loss": 2.7361, "step": 97 }, { "epoch": 0.01, "grad_norm": 0.8671875, "learning_rate": 1.3101604278074866e-05, "loss": 2.7452, "step": 98 }, { "epoch": 0.01, "grad_norm": 0.8671875, "learning_rate": 1.323529411764706e-05, "loss": 2.7607, "step": 99 }, { "epoch": 0.01, "grad_norm": 0.859375, "learning_rate": 1.3368983957219252e-05, "loss": 2.7384, "step": 100 }, { "epoch": 0.01, "grad_norm": 0.84375, "learning_rate": 1.3502673796791445e-05, "loss": 2.733, "step": 101 }, { "epoch": 0.01, "grad_norm": 0.859375, "learning_rate": 1.3636363636363637e-05, "loss": 2.7066, "step": 102 }, { "epoch": 0.01, "grad_norm": 0.828125, "learning_rate": 1.377005347593583e-05, "loss": 2.7326, "step": 103 }, { "epoch": 0.01, "grad_norm": 0.8046875, "learning_rate": 1.3903743315508022e-05, "loss": 2.7299, "step": 104 }, { "epoch": 0.01, "grad_norm": 0.81640625, "learning_rate": 1.4037433155080215e-05, "loss": 2.743, "step": 105 }, { "epoch": 0.01, "grad_norm": 0.78125, "learning_rate": 1.4171122994652408e-05, "loss": 2.7097, "step": 106 }, { "epoch": 0.01, "grad_norm": 0.78125, "learning_rate": 1.4304812834224598e-05, "loss": 2.722, "step": 107 }, { "epoch": 0.01, "grad_norm": 0.79296875, "learning_rate": 1.4438502673796791e-05, "loss": 2.7625, "step": 108 }, { "epoch": 0.01, "grad_norm": 0.7734375, "learning_rate": 1.4572192513368984e-05, "loss": 2.7212, "step": 109 }, { "epoch": 0.01, "grad_norm": 0.7734375, "learning_rate": 1.4705882352941177e-05, "loss": 2.7268, "step": 110 }, { "epoch": 0.01, "grad_norm": 0.76171875, "learning_rate": 1.4839572192513372e-05, "loss": 2.7135, "step": 111 }, { "epoch": 0.01, "grad_norm": 0.7734375, "learning_rate": 1.4973262032085561e-05, "loss": 2.7579, "step": 112 }, { "epoch": 0.02, "grad_norm": 0.7578125, "learning_rate": 1.5106951871657754e-05, "loss": 2.7141, "step": 113 }, { "epoch": 0.02, "grad_norm": 0.72265625, "learning_rate": 1.5240641711229947e-05, "loss": 2.6927, "step": 114 }, { "epoch": 0.02, "grad_norm": 0.734375, "learning_rate": 1.5374331550802142e-05, "loss": 2.7285, "step": 115 }, { "epoch": 0.02, "grad_norm": 0.734375, "learning_rate": 1.5508021390374333e-05, "loss": 2.7444, "step": 116 }, { "epoch": 0.02, "grad_norm": 0.73046875, "learning_rate": 1.5641711229946524e-05, "loss": 2.7114, "step": 117 }, { "epoch": 0.02, "grad_norm": 0.69921875, "learning_rate": 1.5775401069518716e-05, "loss": 2.7094, "step": 118 }, { "epoch": 0.02, "grad_norm": 0.7265625, "learning_rate": 1.590909090909091e-05, "loss": 2.7294, "step": 119 }, { "epoch": 0.02, "grad_norm": 0.6875, "learning_rate": 1.60427807486631e-05, "loss": 2.7296, "step": 120 }, { "epoch": 0.02, "grad_norm": 0.71484375, "learning_rate": 1.6176470588235296e-05, "loss": 2.7198, "step": 121 }, { "epoch": 0.02, "grad_norm": 0.70703125, "learning_rate": 1.6310160427807487e-05, "loss": 2.7068, "step": 122 }, { "epoch": 0.02, "grad_norm": 0.7109375, "learning_rate": 1.644385026737968e-05, "loss": 2.7302, "step": 123 }, { "epoch": 0.02, "grad_norm": 0.67578125, "learning_rate": 1.6577540106951873e-05, "loss": 2.6763, "step": 124 }, { "epoch": 0.02, "grad_norm": 0.68359375, "learning_rate": 1.6711229946524065e-05, "loss": 2.7112, "step": 125 }, { "epoch": 0.02, "grad_norm": 0.69140625, "learning_rate": 1.684491978609626e-05, "loss": 2.718, "step": 126 }, { "epoch": 0.02, "grad_norm": 0.6328125, "learning_rate": 1.697860962566845e-05, "loss": 2.7131, "step": 127 }, { "epoch": 0.02, "grad_norm": 0.65625, "learning_rate": 1.7112299465240642e-05, "loss": 2.719, "step": 128 }, { "epoch": 0.02, "grad_norm": 0.65625, "learning_rate": 1.7245989304812833e-05, "loss": 2.7049, "step": 129 }, { "epoch": 0.02, "grad_norm": 0.66796875, "learning_rate": 1.7379679144385028e-05, "loss": 2.6974, "step": 130 }, { "epoch": 0.02, "grad_norm": 0.6328125, "learning_rate": 1.7513368983957222e-05, "loss": 2.7404, "step": 131 }, { "epoch": 0.02, "grad_norm": 0.62890625, "learning_rate": 1.7647058823529414e-05, "loss": 2.7033, "step": 132 }, { "epoch": 0.02, "grad_norm": 0.65234375, "learning_rate": 1.7780748663101605e-05, "loss": 2.6945, "step": 133 }, { "epoch": 0.02, "grad_norm": 0.6171875, "learning_rate": 1.7914438502673796e-05, "loss": 2.6939, "step": 134 }, { "epoch": 0.02, "grad_norm": 0.60546875, "learning_rate": 1.804812834224599e-05, "loss": 2.6933, "step": 135 }, { "epoch": 0.02, "grad_norm": 0.625, "learning_rate": 1.8181818181818182e-05, "loss": 2.6897, "step": 136 }, { "epoch": 0.02, "grad_norm": 0.62890625, "learning_rate": 1.8315508021390377e-05, "loss": 2.6856, "step": 137 }, { "epoch": 0.02, "grad_norm": 0.6171875, "learning_rate": 1.8449197860962568e-05, "loss": 2.6909, "step": 138 }, { "epoch": 0.02, "grad_norm": 0.59375, "learning_rate": 1.858288770053476e-05, "loss": 2.7113, "step": 139 }, { "epoch": 0.02, "grad_norm": 0.59765625, "learning_rate": 1.8716577540106954e-05, "loss": 2.6805, "step": 140 }, { "epoch": 0.02, "grad_norm": 0.578125, "learning_rate": 1.8850267379679145e-05, "loss": 2.682, "step": 141 }, { "epoch": 0.02, "grad_norm": 0.59765625, "learning_rate": 1.898395721925134e-05, "loss": 2.6746, "step": 142 }, { "epoch": 0.02, "grad_norm": 0.6015625, "learning_rate": 1.9117647058823528e-05, "loss": 2.6806, "step": 143 }, { "epoch": 0.02, "grad_norm": 0.59765625, "learning_rate": 1.9251336898395722e-05, "loss": 2.6871, "step": 144 }, { "epoch": 0.02, "grad_norm": 0.58984375, "learning_rate": 1.9385026737967914e-05, "loss": 2.6993, "step": 145 }, { "epoch": 0.02, "grad_norm": 0.6015625, "learning_rate": 1.951871657754011e-05, "loss": 2.7123, "step": 146 }, { "epoch": 0.02, "grad_norm": 0.55859375, "learning_rate": 1.96524064171123e-05, "loss": 2.6924, "step": 147 }, { "epoch": 0.02, "grad_norm": 0.578125, "learning_rate": 1.9786096256684494e-05, "loss": 2.6772, "step": 148 }, { "epoch": 0.02, "grad_norm": 0.55078125, "learning_rate": 1.9919786096256686e-05, "loss": 2.6593, "step": 149 }, { "epoch": 0.02, "grad_norm": 0.58984375, "learning_rate": 2.0053475935828877e-05, "loss": 2.6808, "step": 150 }, { "epoch": 0.02, "grad_norm": 0.55078125, "learning_rate": 2.018716577540107e-05, "loss": 2.6685, "step": 151 }, { "epoch": 0.02, "grad_norm": 0.58203125, "learning_rate": 2.0320855614973263e-05, "loss": 2.6675, "step": 152 }, { "epoch": 0.02, "grad_norm": 0.5703125, "learning_rate": 2.0454545454545457e-05, "loss": 2.6541, "step": 153 }, { "epoch": 0.02, "grad_norm": 0.55078125, "learning_rate": 2.058823529411765e-05, "loss": 2.6557, "step": 154 }, { "epoch": 0.02, "grad_norm": 0.56640625, "learning_rate": 2.072192513368984e-05, "loss": 2.6638, "step": 155 }, { "epoch": 0.02, "grad_norm": 0.54296875, "learning_rate": 2.0855614973262035e-05, "loss": 2.6703, "step": 156 }, { "epoch": 0.02, "grad_norm": 0.55859375, "learning_rate": 2.0989304812834226e-05, "loss": 2.6515, "step": 157 }, { "epoch": 0.02, "grad_norm": 0.546875, "learning_rate": 2.112299465240642e-05, "loss": 2.6727, "step": 158 }, { "epoch": 0.02, "grad_norm": 0.55859375, "learning_rate": 2.125668449197861e-05, "loss": 2.6852, "step": 159 }, { "epoch": 0.02, "grad_norm": 0.5703125, "learning_rate": 2.1390374331550803e-05, "loss": 2.7048, "step": 160 }, { "epoch": 0.02, "grad_norm": 0.56640625, "learning_rate": 2.1524064171122994e-05, "loss": 2.7107, "step": 161 }, { "epoch": 0.02, "grad_norm": 0.5625, "learning_rate": 2.165775401069519e-05, "loss": 2.6781, "step": 162 }, { "epoch": 0.02, "grad_norm": 0.56640625, "learning_rate": 2.179144385026738e-05, "loss": 2.6619, "step": 163 }, { "epoch": 0.02, "grad_norm": 0.546875, "learning_rate": 2.192513368983957e-05, "loss": 2.6766, "step": 164 }, { "epoch": 0.02, "grad_norm": 0.53515625, "learning_rate": 2.2058823529411766e-05, "loss": 2.6516, "step": 165 }, { "epoch": 0.02, "grad_norm": 0.51953125, "learning_rate": 2.2192513368983957e-05, "loss": 2.6481, "step": 166 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 2.2326203208556152e-05, "loss": 2.6768, "step": 167 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 2.2459893048128343e-05, "loss": 2.6542, "step": 168 }, { "epoch": 0.02, "grad_norm": 0.5390625, "learning_rate": 2.2593582887700535e-05, "loss": 2.6645, "step": 169 }, { "epoch": 0.02, "grad_norm": 0.51953125, "learning_rate": 2.272727272727273e-05, "loss": 2.6719, "step": 170 }, { "epoch": 0.02, "grad_norm": 0.53125, "learning_rate": 2.286096256684492e-05, "loss": 2.6886, "step": 171 }, { "epoch": 0.02, "grad_norm": 0.52734375, "learning_rate": 2.2994652406417115e-05, "loss": 2.651, "step": 172 }, { "epoch": 0.02, "grad_norm": 0.51953125, "learning_rate": 2.3128342245989306e-05, "loss": 2.6548, "step": 173 }, { "epoch": 0.02, "grad_norm": 0.50390625, "learning_rate": 2.32620320855615e-05, "loss": 2.6505, "step": 174 }, { "epoch": 0.02, "grad_norm": 0.54296875, "learning_rate": 2.339572192513369e-05, "loss": 2.651, "step": 175 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 2.3529411764705884e-05, "loss": 2.664, "step": 176 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 2.3663101604278075e-05, "loss": 2.6166, "step": 177 }, { "epoch": 0.02, "grad_norm": 0.53125, "learning_rate": 2.379679144385027e-05, "loss": 2.6526, "step": 178 }, { "epoch": 0.02, "grad_norm": 0.54296875, "learning_rate": 2.393048128342246e-05, "loss": 2.6636, "step": 179 }, { "epoch": 0.02, "grad_norm": 0.52734375, "learning_rate": 2.4064171122994652e-05, "loss": 2.6695, "step": 180 }, { "epoch": 0.02, "grad_norm": 0.54296875, "learning_rate": 2.4197860962566847e-05, "loss": 2.6411, "step": 181 }, { "epoch": 0.02, "grad_norm": 0.54296875, "learning_rate": 2.4331550802139038e-05, "loss": 2.658, "step": 182 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 2.4465240641711233e-05, "loss": 2.6724, "step": 183 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 2.4598930481283424e-05, "loss": 2.6544, "step": 184 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 2.4732620320855615e-05, "loss": 2.6497, "step": 185 }, { "epoch": 0.02, "grad_norm": 0.5234375, "learning_rate": 2.4866310160427807e-05, "loss": 2.6245, "step": 186 }, { "epoch": 0.02, "grad_norm": 0.51171875, "learning_rate": 2.5e-05, "loss": 2.6247, "step": 187 }, { "epoch": 0.03, "grad_norm": 0.482421875, "learning_rate": 2.5133689839572196e-05, "loss": 2.6082, "step": 188 }, { "epoch": 0.03, "grad_norm": 0.5, "learning_rate": 2.5267379679144387e-05, "loss": 2.6546, "step": 189 }, { "epoch": 0.03, "grad_norm": 0.51171875, "learning_rate": 2.5401069518716582e-05, "loss": 2.6732, "step": 190 }, { "epoch": 0.03, "grad_norm": 0.5, "learning_rate": 2.5534759358288773e-05, "loss": 2.6393, "step": 191 }, { "epoch": 0.03, "grad_norm": 0.486328125, "learning_rate": 2.5668449197860968e-05, "loss": 2.5856, "step": 192 }, { "epoch": 0.03, "grad_norm": 0.515625, "learning_rate": 2.5802139037433156e-05, "loss": 2.5996, "step": 193 }, { "epoch": 0.03, "grad_norm": 0.5078125, "learning_rate": 2.5935828877005347e-05, "loss": 2.6519, "step": 194 }, { "epoch": 0.03, "grad_norm": 0.49609375, "learning_rate": 2.606951871657754e-05, "loss": 2.6361, "step": 195 }, { "epoch": 0.03, "grad_norm": 0.50390625, "learning_rate": 2.6203208556149733e-05, "loss": 2.6638, "step": 196 }, { "epoch": 0.03, "grad_norm": 0.484375, "learning_rate": 2.6336898395721927e-05, "loss": 2.6092, "step": 197 }, { "epoch": 0.03, "grad_norm": 0.48046875, "learning_rate": 2.647058823529412e-05, "loss": 2.6256, "step": 198 }, { "epoch": 0.03, "grad_norm": 0.478515625, "learning_rate": 2.6604278074866313e-05, "loss": 2.6127, "step": 199 }, { "epoch": 0.03, "grad_norm": 0.51953125, "learning_rate": 2.6737967914438505e-05, "loss": 2.5827, "step": 200 }, { "epoch": 0.03, "grad_norm": 0.5, "learning_rate": 2.68716577540107e-05, "loss": 2.6488, "step": 201 }, { "epoch": 0.03, "grad_norm": 0.484375, "learning_rate": 2.700534759358289e-05, "loss": 2.6092, "step": 202 }, { "epoch": 0.03, "grad_norm": 0.5078125, "learning_rate": 2.713903743315508e-05, "loss": 2.5786, "step": 203 }, { "epoch": 0.03, "grad_norm": 0.490234375, "learning_rate": 2.7272727272727273e-05, "loss": 2.63, "step": 204 }, { "epoch": 0.03, "grad_norm": 0.474609375, "learning_rate": 2.7406417112299464e-05, "loss": 2.6367, "step": 205 }, { "epoch": 0.03, "grad_norm": 0.478515625, "learning_rate": 2.754010695187166e-05, "loss": 2.6189, "step": 206 }, { "epoch": 0.03, "grad_norm": 0.4609375, "learning_rate": 2.767379679144385e-05, "loss": 2.6089, "step": 207 }, { "epoch": 0.03, "grad_norm": 0.482421875, "learning_rate": 2.7807486631016045e-05, "loss": 2.6392, "step": 208 }, { "epoch": 0.03, "grad_norm": 0.46484375, "learning_rate": 2.7941176470588236e-05, "loss": 2.6111, "step": 209 }, { "epoch": 0.03, "grad_norm": 0.466796875, "learning_rate": 2.807486631016043e-05, "loss": 2.6187, "step": 210 }, { "epoch": 0.03, "grad_norm": 0.44921875, "learning_rate": 2.8208556149732622e-05, "loss": 2.5909, "step": 211 }, { "epoch": 0.03, "grad_norm": 0.455078125, "learning_rate": 2.8342245989304817e-05, "loss": 2.6312, "step": 212 }, { "epoch": 0.03, "grad_norm": 0.447265625, "learning_rate": 2.8475935828877005e-05, "loss": 2.6422, "step": 213 }, { "epoch": 0.03, "grad_norm": 0.44140625, "learning_rate": 2.8609625668449196e-05, "loss": 2.6055, "step": 214 }, { "epoch": 0.03, "grad_norm": 0.451171875, "learning_rate": 2.874331550802139e-05, "loss": 2.6302, "step": 215 }, { "epoch": 0.03, "grad_norm": 0.43359375, "learning_rate": 2.8877005347593582e-05, "loss": 2.6274, "step": 216 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 2.9010695187165777e-05, "loss": 2.6186, "step": 217 }, { "epoch": 0.03, "grad_norm": 0.462890625, "learning_rate": 2.9144385026737968e-05, "loss": 2.648, "step": 218 }, { "epoch": 0.03, "grad_norm": 0.431640625, "learning_rate": 2.9278074866310162e-05, "loss": 2.6071, "step": 219 }, { "epoch": 0.03, "grad_norm": 0.455078125, "learning_rate": 2.9411764705882354e-05, "loss": 2.6294, "step": 220 }, { "epoch": 0.03, "grad_norm": 0.4453125, "learning_rate": 2.954545454545455e-05, "loss": 2.5918, "step": 221 }, { "epoch": 0.03, "grad_norm": 0.455078125, "learning_rate": 2.9679144385026743e-05, "loss": 2.5963, "step": 222 }, { "epoch": 0.03, "grad_norm": 0.435546875, "learning_rate": 2.9812834224598934e-05, "loss": 2.618, "step": 223 }, { "epoch": 0.03, "grad_norm": 0.435546875, "learning_rate": 2.9946524064171122e-05, "loss": 2.591, "step": 224 }, { "epoch": 0.03, "grad_norm": 0.4609375, "learning_rate": 3.0080213903743313e-05, "loss": 2.6231, "step": 225 }, { "epoch": 0.03, "grad_norm": 0.439453125, "learning_rate": 3.0213903743315508e-05, "loss": 2.5979, "step": 226 }, { "epoch": 0.03, "grad_norm": 0.431640625, "learning_rate": 3.0347593582887703e-05, "loss": 2.5569, "step": 227 }, { "epoch": 0.03, "grad_norm": 0.435546875, "learning_rate": 3.0481283422459894e-05, "loss": 2.5779, "step": 228 }, { "epoch": 0.03, "grad_norm": 0.447265625, "learning_rate": 3.0614973262032085e-05, "loss": 2.5872, "step": 229 }, { "epoch": 0.03, "grad_norm": 0.41796875, "learning_rate": 3.0748663101604283e-05, "loss": 2.5871, "step": 230 }, { "epoch": 0.03, "grad_norm": 0.4375, "learning_rate": 3.0882352941176475e-05, "loss": 2.5723, "step": 231 }, { "epoch": 0.03, "grad_norm": 0.41796875, "learning_rate": 3.1016042780748666e-05, "loss": 2.604, "step": 232 }, { "epoch": 0.03, "grad_norm": 0.4296875, "learning_rate": 3.114973262032086e-05, "loss": 2.6038, "step": 233 }, { "epoch": 0.03, "grad_norm": 0.44140625, "learning_rate": 3.128342245989305e-05, "loss": 2.6056, "step": 234 }, { "epoch": 0.03, "grad_norm": 0.41796875, "learning_rate": 3.141711229946524e-05, "loss": 2.5972, "step": 235 }, { "epoch": 0.03, "grad_norm": 0.416015625, "learning_rate": 3.155080213903743e-05, "loss": 2.5937, "step": 236 }, { "epoch": 0.03, "grad_norm": 0.421875, "learning_rate": 3.168449197860963e-05, "loss": 2.5928, "step": 237 }, { "epoch": 0.03, "grad_norm": 0.4140625, "learning_rate": 3.181818181818182e-05, "loss": 2.5938, "step": 238 }, { "epoch": 0.03, "grad_norm": 0.42578125, "learning_rate": 3.195187165775401e-05, "loss": 2.5988, "step": 239 }, { "epoch": 0.03, "grad_norm": 0.419921875, "learning_rate": 3.20855614973262e-05, "loss": 2.5783, "step": 240 }, { "epoch": 0.03, "grad_norm": 0.421875, "learning_rate": 3.22192513368984e-05, "loss": 2.5678, "step": 241 }, { "epoch": 0.03, "grad_norm": 0.41796875, "learning_rate": 3.235294117647059e-05, "loss": 2.5753, "step": 242 }, { "epoch": 0.03, "grad_norm": 0.41796875, "learning_rate": 3.2486631016042783e-05, "loss": 2.6086, "step": 243 }, { "epoch": 0.03, "grad_norm": 0.396484375, "learning_rate": 3.2620320855614975e-05, "loss": 2.5772, "step": 244 }, { "epoch": 0.03, "grad_norm": 0.39453125, "learning_rate": 3.2754010695187166e-05, "loss": 2.5783, "step": 245 }, { "epoch": 0.03, "grad_norm": 0.412109375, "learning_rate": 3.288770053475936e-05, "loss": 2.5984, "step": 246 }, { "epoch": 0.03, "grad_norm": 0.412109375, "learning_rate": 3.302139037433155e-05, "loss": 2.5836, "step": 247 }, { "epoch": 0.03, "grad_norm": 0.396484375, "learning_rate": 3.3155080213903747e-05, "loss": 2.5638, "step": 248 }, { "epoch": 0.03, "grad_norm": 0.42578125, "learning_rate": 3.328877005347594e-05, "loss": 2.5746, "step": 249 }, { "epoch": 0.03, "grad_norm": 0.400390625, "learning_rate": 3.342245989304813e-05, "loss": 2.5894, "step": 250 }, { "epoch": 0.03, "grad_norm": 0.41796875, "learning_rate": 3.355614973262032e-05, "loss": 2.589, "step": 251 }, { "epoch": 0.03, "grad_norm": 0.416015625, "learning_rate": 3.368983957219252e-05, "loss": 2.5703, "step": 252 }, { "epoch": 0.03, "grad_norm": 0.40625, "learning_rate": 3.382352941176471e-05, "loss": 2.572, "step": 253 }, { "epoch": 0.03, "grad_norm": 0.3984375, "learning_rate": 3.39572192513369e-05, "loss": 2.5774, "step": 254 }, { "epoch": 0.03, "grad_norm": 0.419921875, "learning_rate": 3.409090909090909e-05, "loss": 2.5897, "step": 255 }, { "epoch": 0.03, "grad_norm": 0.400390625, "learning_rate": 3.4224598930481284e-05, "loss": 2.5954, "step": 256 }, { "epoch": 0.03, "grad_norm": 0.380859375, "learning_rate": 3.4358288770053475e-05, "loss": 2.549, "step": 257 }, { "epoch": 0.03, "grad_norm": 0.39453125, "learning_rate": 3.4491978609625666e-05, "loss": 2.5745, "step": 258 }, { "epoch": 0.03, "grad_norm": 0.404296875, "learning_rate": 3.4625668449197864e-05, "loss": 2.5559, "step": 259 }, { "epoch": 0.03, "grad_norm": 0.40234375, "learning_rate": 3.4759358288770055e-05, "loss": 2.5696, "step": 260 }, { "epoch": 0.03, "grad_norm": 0.40234375, "learning_rate": 3.489304812834225e-05, "loss": 2.57, "step": 261 }, { "epoch": 0.03, "grad_norm": 0.376953125, "learning_rate": 3.5026737967914445e-05, "loss": 2.5833, "step": 262 }, { "epoch": 0.04, "grad_norm": 0.3828125, "learning_rate": 3.5160427807486636e-05, "loss": 2.5694, "step": 263 }, { "epoch": 0.04, "grad_norm": 0.392578125, "learning_rate": 3.529411764705883e-05, "loss": 2.5496, "step": 264 }, { "epoch": 0.04, "grad_norm": 0.404296875, "learning_rate": 3.542780748663101e-05, "loss": 2.5559, "step": 265 }, { "epoch": 0.04, "grad_norm": 0.396484375, "learning_rate": 3.556149732620321e-05, "loss": 2.5499, "step": 266 }, { "epoch": 0.04, "grad_norm": 0.37890625, "learning_rate": 3.56951871657754e-05, "loss": 2.5744, "step": 267 }, { "epoch": 0.04, "grad_norm": 0.388671875, "learning_rate": 3.582887700534759e-05, "loss": 2.5782, "step": 268 }, { "epoch": 0.04, "grad_norm": 0.404296875, "learning_rate": 3.596256684491979e-05, "loss": 2.5623, "step": 269 }, { "epoch": 0.04, "grad_norm": 0.3671875, "learning_rate": 3.609625668449198e-05, "loss": 2.5659, "step": 270 }, { "epoch": 0.04, "grad_norm": 0.3984375, "learning_rate": 3.622994652406417e-05, "loss": 2.5755, "step": 271 }, { "epoch": 0.04, "grad_norm": 0.404296875, "learning_rate": 3.6363636363636364e-05, "loss": 2.5768, "step": 272 }, { "epoch": 0.04, "grad_norm": 0.384765625, "learning_rate": 3.649732620320856e-05, "loss": 2.5868, "step": 273 }, { "epoch": 0.04, "grad_norm": 0.37890625, "learning_rate": 3.6631016042780753e-05, "loss": 2.554, "step": 274 }, { "epoch": 0.04, "grad_norm": 0.380859375, "learning_rate": 3.6764705882352945e-05, "loss": 2.5636, "step": 275 }, { "epoch": 0.04, "grad_norm": 0.384765625, "learning_rate": 3.6898395721925136e-05, "loss": 2.5637, "step": 276 }, { "epoch": 0.04, "grad_norm": 0.375, "learning_rate": 3.703208556149733e-05, "loss": 2.5663, "step": 277 }, { "epoch": 0.04, "grad_norm": 0.40234375, "learning_rate": 3.716577540106952e-05, "loss": 2.5713, "step": 278 }, { "epoch": 0.04, "grad_norm": 0.419921875, "learning_rate": 3.729946524064171e-05, "loss": 2.5874, "step": 279 }, { "epoch": 0.04, "grad_norm": 0.380859375, "learning_rate": 3.743315508021391e-05, "loss": 2.5601, "step": 280 }, { "epoch": 0.04, "grad_norm": 0.3828125, "learning_rate": 3.75668449197861e-05, "loss": 2.5446, "step": 281 }, { "epoch": 0.04, "grad_norm": 0.38671875, "learning_rate": 3.770053475935829e-05, "loss": 2.5486, "step": 282 }, { "epoch": 0.04, "grad_norm": 0.404296875, "learning_rate": 3.783422459893048e-05, "loss": 2.5568, "step": 283 }, { "epoch": 0.04, "grad_norm": 0.375, "learning_rate": 3.796791443850268e-05, "loss": 2.5515, "step": 284 }, { "epoch": 0.04, "grad_norm": 0.373046875, "learning_rate": 3.810160427807487e-05, "loss": 2.5209, "step": 285 }, { "epoch": 0.04, "grad_norm": 0.37890625, "learning_rate": 3.8235294117647055e-05, "loss": 2.5596, "step": 286 }, { "epoch": 0.04, "grad_norm": 0.40234375, "learning_rate": 3.8368983957219254e-05, "loss": 2.5518, "step": 287 }, { "epoch": 0.04, "grad_norm": 0.35546875, "learning_rate": 3.8502673796791445e-05, "loss": 2.5599, "step": 288 }, { "epoch": 0.04, "grad_norm": 0.384765625, "learning_rate": 3.8636363636363636e-05, "loss": 2.5363, "step": 289 }, { "epoch": 0.04, "grad_norm": 0.404296875, "learning_rate": 3.877005347593583e-05, "loss": 2.5655, "step": 290 }, { "epoch": 0.04, "grad_norm": 0.388671875, "learning_rate": 3.8903743315508025e-05, "loss": 2.5775, "step": 291 }, { "epoch": 0.04, "grad_norm": 0.388671875, "learning_rate": 3.903743315508022e-05, "loss": 2.5595, "step": 292 }, { "epoch": 0.04, "grad_norm": 0.41015625, "learning_rate": 3.917112299465241e-05, "loss": 2.5201, "step": 293 }, { "epoch": 0.04, "grad_norm": 0.365234375, "learning_rate": 3.93048128342246e-05, "loss": 2.5386, "step": 294 }, { "epoch": 0.04, "grad_norm": 0.359375, "learning_rate": 3.94385026737968e-05, "loss": 2.5259, "step": 295 }, { "epoch": 0.04, "grad_norm": 0.373046875, "learning_rate": 3.957219251336899e-05, "loss": 2.519, "step": 296 }, { "epoch": 0.04, "grad_norm": 0.373046875, "learning_rate": 3.970588235294117e-05, "loss": 2.5493, "step": 297 }, { "epoch": 0.04, "grad_norm": 0.38671875, "learning_rate": 3.983957219251337e-05, "loss": 2.5004, "step": 298 }, { "epoch": 0.04, "grad_norm": 0.37890625, "learning_rate": 3.997326203208556e-05, "loss": 2.5466, "step": 299 }, { "epoch": 0.04, "grad_norm": 0.353515625, "learning_rate": 4.0106951871657754e-05, "loss": 2.524, "step": 300 }, { "epoch": 0.04, "grad_norm": 0.33984375, "learning_rate": 4.024064171122995e-05, "loss": 2.5113, "step": 301 }, { "epoch": 0.04, "grad_norm": 0.35546875, "learning_rate": 4.037433155080214e-05, "loss": 2.555, "step": 302 }, { "epoch": 0.04, "grad_norm": 0.375, "learning_rate": 4.0508021390374334e-05, "loss": 2.5042, "step": 303 }, { "epoch": 0.04, "grad_norm": 0.35546875, "learning_rate": 4.0641711229946525e-05, "loss": 2.5205, "step": 304 }, { "epoch": 0.04, "grad_norm": 0.3515625, "learning_rate": 4.0775401069518723e-05, "loss": 2.5058, "step": 305 }, { "epoch": 0.04, "grad_norm": 0.365234375, "learning_rate": 4.0909090909090915e-05, "loss": 2.545, "step": 306 }, { "epoch": 0.04, "grad_norm": 0.35546875, "learning_rate": 4.10427807486631e-05, "loss": 2.5611, "step": 307 }, { "epoch": 0.04, "grad_norm": 0.365234375, "learning_rate": 4.11764705882353e-05, "loss": 2.5244, "step": 308 }, { "epoch": 0.04, "grad_norm": 0.36328125, "learning_rate": 4.131016042780749e-05, "loss": 2.5524, "step": 309 }, { "epoch": 0.04, "grad_norm": 0.361328125, "learning_rate": 4.144385026737968e-05, "loss": 2.5446, "step": 310 }, { "epoch": 0.04, "grad_norm": 0.369140625, "learning_rate": 4.157754010695187e-05, "loss": 2.5356, "step": 311 }, { "epoch": 0.04, "grad_norm": 0.365234375, "learning_rate": 4.171122994652407e-05, "loss": 2.516, "step": 312 }, { "epoch": 0.04, "grad_norm": 0.361328125, "learning_rate": 4.184491978609626e-05, "loss": 2.5543, "step": 313 }, { "epoch": 0.04, "grad_norm": 0.35546875, "learning_rate": 4.197860962566845e-05, "loss": 2.5427, "step": 314 }, { "epoch": 0.04, "grad_norm": 0.34375, "learning_rate": 4.211229946524064e-05, "loss": 2.5231, "step": 315 }, { "epoch": 0.04, "grad_norm": 0.37109375, "learning_rate": 4.224598930481284e-05, "loss": 2.5181, "step": 316 }, { "epoch": 0.04, "grad_norm": 0.376953125, "learning_rate": 4.2379679144385025e-05, "loss": 2.5111, "step": 317 }, { "epoch": 0.04, "grad_norm": 0.328125, "learning_rate": 4.251336898395722e-05, "loss": 2.5155, "step": 318 }, { "epoch": 0.04, "grad_norm": 0.349609375, "learning_rate": 4.2647058823529415e-05, "loss": 2.5158, "step": 319 }, { "epoch": 0.04, "grad_norm": 0.3671875, "learning_rate": 4.2780748663101606e-05, "loss": 2.5442, "step": 320 }, { "epoch": 0.04, "grad_norm": 0.36328125, "learning_rate": 4.29144385026738e-05, "loss": 2.5172, "step": 321 }, { "epoch": 0.04, "grad_norm": 0.373046875, "learning_rate": 4.304812834224599e-05, "loss": 2.5107, "step": 322 }, { "epoch": 0.04, "grad_norm": 0.365234375, "learning_rate": 4.318181818181819e-05, "loss": 2.5005, "step": 323 }, { "epoch": 0.04, "grad_norm": 0.3671875, "learning_rate": 4.331550802139038e-05, "loss": 2.5201, "step": 324 }, { "epoch": 0.04, "grad_norm": 0.33984375, "learning_rate": 4.344919786096257e-05, "loss": 2.5317, "step": 325 }, { "epoch": 0.04, "grad_norm": 0.36328125, "learning_rate": 4.358288770053476e-05, "loss": 2.5028, "step": 326 }, { "epoch": 0.04, "grad_norm": 0.35546875, "learning_rate": 4.371657754010696e-05, "loss": 2.5216, "step": 327 }, { "epoch": 0.04, "grad_norm": 0.36328125, "learning_rate": 4.385026737967914e-05, "loss": 2.522, "step": 328 }, { "epoch": 0.04, "grad_norm": 0.36328125, "learning_rate": 4.3983957219251334e-05, "loss": 2.4726, "step": 329 }, { "epoch": 0.04, "grad_norm": 0.34375, "learning_rate": 4.411764705882353e-05, "loss": 2.4739, "step": 330 }, { "epoch": 0.04, "grad_norm": 0.384765625, "learning_rate": 4.4251336898395724e-05, "loss": 2.5059, "step": 331 }, { "epoch": 0.04, "grad_norm": 0.3828125, "learning_rate": 4.4385026737967915e-05, "loss": 2.5206, "step": 332 }, { "epoch": 0.04, "grad_norm": 0.3828125, "learning_rate": 4.4518716577540106e-05, "loss": 2.5092, "step": 333 }, { "epoch": 0.04, "grad_norm": 0.359375, "learning_rate": 4.4652406417112304e-05, "loss": 2.4801, "step": 334 }, { "epoch": 0.04, "grad_norm": 0.36328125, "learning_rate": 4.4786096256684495e-05, "loss": 2.5102, "step": 335 }, { "epoch": 0.04, "grad_norm": 0.373046875, "learning_rate": 4.491978609625669e-05, "loss": 2.4842, "step": 336 }, { "epoch": 0.04, "grad_norm": 0.3515625, "learning_rate": 4.5053475935828885e-05, "loss": 2.5131, "step": 337 }, { "epoch": 0.05, "grad_norm": 0.34375, "learning_rate": 4.518716577540107e-05, "loss": 2.5179, "step": 338 }, { "epoch": 0.05, "grad_norm": 0.345703125, "learning_rate": 4.532085561497326e-05, "loss": 2.4853, "step": 339 }, { "epoch": 0.05, "grad_norm": 0.357421875, "learning_rate": 4.545454545454546e-05, "loss": 2.4993, "step": 340 }, { "epoch": 0.05, "grad_norm": 0.37109375, "learning_rate": 4.558823529411765e-05, "loss": 2.5318, "step": 341 }, { "epoch": 0.05, "grad_norm": 0.353515625, "learning_rate": 4.572192513368984e-05, "loss": 2.4857, "step": 342 }, { "epoch": 0.05, "grad_norm": 0.36328125, "learning_rate": 4.585561497326203e-05, "loss": 2.4899, "step": 343 }, { "epoch": 0.05, "grad_norm": 0.376953125, "learning_rate": 4.598930481283423e-05, "loss": 2.4736, "step": 344 }, { "epoch": 0.05, "grad_norm": 0.3671875, "learning_rate": 4.612299465240642e-05, "loss": 2.4819, "step": 345 }, { "epoch": 0.05, "grad_norm": 0.341796875, "learning_rate": 4.625668449197861e-05, "loss": 2.4824, "step": 346 }, { "epoch": 0.05, "grad_norm": 0.359375, "learning_rate": 4.6390374331550804e-05, "loss": 2.5004, "step": 347 }, { "epoch": 0.05, "grad_norm": 0.345703125, "learning_rate": 4.6524064171123e-05, "loss": 2.4688, "step": 348 }, { "epoch": 0.05, "grad_norm": 0.326171875, "learning_rate": 4.665775401069519e-05, "loss": 2.4953, "step": 349 }, { "epoch": 0.05, "grad_norm": 0.33984375, "learning_rate": 4.679144385026738e-05, "loss": 2.509, "step": 350 }, { "epoch": 0.05, "grad_norm": 0.34765625, "learning_rate": 4.6925133689839576e-05, "loss": 2.5042, "step": 351 }, { "epoch": 0.05, "grad_norm": 0.359375, "learning_rate": 4.705882352941177e-05, "loss": 2.5208, "step": 352 }, { "epoch": 0.05, "grad_norm": 0.333984375, "learning_rate": 4.719251336898396e-05, "loss": 2.479, "step": 353 }, { "epoch": 0.05, "grad_norm": 0.3203125, "learning_rate": 4.732620320855615e-05, "loss": 2.4838, "step": 354 }, { "epoch": 0.05, "grad_norm": 0.326171875, "learning_rate": 4.745989304812835e-05, "loss": 2.4859, "step": 355 }, { "epoch": 0.05, "grad_norm": 0.37109375, "learning_rate": 4.759358288770054e-05, "loss": 2.4843, "step": 356 }, { "epoch": 0.05, "grad_norm": 0.345703125, "learning_rate": 4.772727272727273e-05, "loss": 2.5314, "step": 357 }, { "epoch": 0.05, "grad_norm": 0.35546875, "learning_rate": 4.786096256684492e-05, "loss": 2.5019, "step": 358 }, { "epoch": 0.05, "grad_norm": 0.314453125, "learning_rate": 4.799465240641711e-05, "loss": 2.4592, "step": 359 }, { "epoch": 0.05, "grad_norm": 0.341796875, "learning_rate": 4.8128342245989304e-05, "loss": 2.4865, "step": 360 }, { "epoch": 0.05, "grad_norm": 0.326171875, "learning_rate": 4.8262032085561496e-05, "loss": 2.518, "step": 361 }, { "epoch": 0.05, "grad_norm": 0.345703125, "learning_rate": 4.8395721925133694e-05, "loss": 2.4979, "step": 362 }, { "epoch": 0.05, "grad_norm": 0.349609375, "learning_rate": 4.8529411764705885e-05, "loss": 2.4983, "step": 363 }, { "epoch": 0.05, "grad_norm": 0.359375, "learning_rate": 4.8663101604278076e-05, "loss": 2.5184, "step": 364 }, { "epoch": 0.05, "grad_norm": 0.328125, "learning_rate": 4.879679144385027e-05, "loss": 2.4911, "step": 365 }, { "epoch": 0.05, "grad_norm": 0.32421875, "learning_rate": 4.8930481283422465e-05, "loss": 2.5068, "step": 366 }, { "epoch": 0.05, "grad_norm": 0.318359375, "learning_rate": 4.906417112299466e-05, "loss": 2.4608, "step": 367 }, { "epoch": 0.05, "grad_norm": 0.314453125, "learning_rate": 4.919786096256685e-05, "loss": 2.4948, "step": 368 }, { "epoch": 0.05, "grad_norm": 0.33203125, "learning_rate": 4.933155080213904e-05, "loss": 2.4845, "step": 369 }, { "epoch": 0.05, "grad_norm": 0.306640625, "learning_rate": 4.946524064171123e-05, "loss": 2.4743, "step": 370 }, { "epoch": 0.05, "grad_norm": 0.32421875, "learning_rate": 4.959893048128342e-05, "loss": 2.5084, "step": 371 }, { "epoch": 0.05, "grad_norm": 0.373046875, "learning_rate": 4.973262032085561e-05, "loss": 2.5062, "step": 372 }, { "epoch": 0.05, "grad_norm": 0.33984375, "learning_rate": 4.986631016042781e-05, "loss": 2.4817, "step": 373 }, { "epoch": 0.05, "grad_norm": 0.333984375, "learning_rate": 5e-05, "loss": 2.4913, "step": 374 }, { "epoch": 0.05, "grad_norm": 0.3203125, "learning_rate": 4.999999991039722e-05, "loss": 2.4327, "step": 375 }, { "epoch": 0.05, "grad_norm": 0.3359375, "learning_rate": 4.9999999641588893e-05, "loss": 2.4936, "step": 376 }, { "epoch": 0.05, "grad_norm": 0.34765625, "learning_rate": 4.9999999193575e-05, "loss": 2.4846, "step": 377 }, { "epoch": 0.05, "grad_norm": 0.3203125, "learning_rate": 4.999999856635555e-05, "loss": 2.482, "step": 378 }, { "epoch": 0.05, "grad_norm": 0.376953125, "learning_rate": 4.999999775993057e-05, "loss": 2.4774, "step": 379 }, { "epoch": 0.05, "grad_norm": 0.341796875, "learning_rate": 4.999999677430004e-05, "loss": 2.4659, "step": 380 }, { "epoch": 0.05, "grad_norm": 0.36328125, "learning_rate": 4.999999560946397e-05, "loss": 2.4643, "step": 381 }, { "epoch": 0.05, "grad_norm": 0.328125, "learning_rate": 4.999999426542238e-05, "loss": 2.4661, "step": 382 }, { "epoch": 0.05, "grad_norm": 0.3828125, "learning_rate": 4.9999992742175277e-05, "loss": 2.4475, "step": 383 }, { "epoch": 0.05, "grad_norm": 0.353515625, "learning_rate": 4.999999103972267e-05, "loss": 2.4977, "step": 384 }, { "epoch": 0.05, "grad_norm": 0.314453125, "learning_rate": 4.9999989158064566e-05, "loss": 2.4937, "step": 385 }, { "epoch": 0.05, "grad_norm": 0.33203125, "learning_rate": 4.999998709720098e-05, "loss": 2.4578, "step": 386 }, { "epoch": 0.05, "grad_norm": 0.328125, "learning_rate": 4.999998485713193e-05, "loss": 2.4866, "step": 387 }, { "epoch": 0.05, "grad_norm": 0.322265625, "learning_rate": 4.999998243785743e-05, "loss": 2.4993, "step": 388 }, { "epoch": 0.05, "grad_norm": 0.361328125, "learning_rate": 4.99999798393775e-05, "loss": 2.4736, "step": 389 }, { "epoch": 0.05, "grad_norm": 0.31640625, "learning_rate": 4.999997706169216e-05, "loss": 2.4772, "step": 390 }, { "epoch": 0.05, "grad_norm": 0.322265625, "learning_rate": 4.9999974104801426e-05, "loss": 2.4251, "step": 391 }, { "epoch": 0.05, "grad_norm": 0.32421875, "learning_rate": 4.999997096870532e-05, "loss": 2.4794, "step": 392 }, { "epoch": 0.05, "grad_norm": 0.333984375, "learning_rate": 4.999996765340387e-05, "loss": 2.4951, "step": 393 }, { "epoch": 0.05, "grad_norm": 0.353515625, "learning_rate": 4.999996415889708e-05, "loss": 2.4984, "step": 394 }, { "epoch": 0.05, "grad_norm": 0.33203125, "learning_rate": 4.9999960485185005e-05, "loss": 2.4765, "step": 395 }, { "epoch": 0.05, "grad_norm": 0.341796875, "learning_rate": 4.999995663226765e-05, "loss": 2.4805, "step": 396 }, { "epoch": 0.05, "grad_norm": 0.302734375, "learning_rate": 4.9999952600145053e-05, "loss": 2.4272, "step": 397 }, { "epoch": 0.05, "grad_norm": 0.33203125, "learning_rate": 4.9999948388817234e-05, "loss": 2.4595, "step": 398 }, { "epoch": 0.05, "grad_norm": 0.3359375, "learning_rate": 4.9999943998284226e-05, "loss": 2.4963, "step": 399 }, { "epoch": 0.05, "grad_norm": 0.314453125, "learning_rate": 4.999993942854606e-05, "loss": 2.4454, "step": 400 }, { "epoch": 0.05, "grad_norm": 0.3359375, "learning_rate": 4.999993467960278e-05, "loss": 2.4449, "step": 401 }, { "epoch": 0.05, "grad_norm": 0.34765625, "learning_rate": 4.99999297514544e-05, "loss": 2.4702, "step": 402 }, { "epoch": 0.05, "grad_norm": 0.345703125, "learning_rate": 4.999992464410097e-05, "loss": 2.4596, "step": 403 }, { "epoch": 0.05, "grad_norm": 0.337890625, "learning_rate": 4.999991935754252e-05, "loss": 2.4787, "step": 404 }, { "epoch": 0.05, "grad_norm": 0.345703125, "learning_rate": 4.999991389177911e-05, "loss": 2.452, "step": 405 }, { "epoch": 0.05, "grad_norm": 0.333984375, "learning_rate": 4.9999908246810734e-05, "loss": 2.4626, "step": 406 }, { "epoch": 0.05, "grad_norm": 0.34375, "learning_rate": 4.999990242263747e-05, "loss": 2.4937, "step": 407 }, { "epoch": 0.05, "grad_norm": 0.322265625, "learning_rate": 4.999989641925935e-05, "loss": 2.4631, "step": 408 }, { "epoch": 0.05, "grad_norm": 0.3359375, "learning_rate": 4.9999890236676416e-05, "loss": 2.4748, "step": 409 }, { "epoch": 0.05, "grad_norm": 0.30859375, "learning_rate": 4.9999883874888705e-05, "loss": 2.4776, "step": 410 }, { "epoch": 0.05, "grad_norm": 0.337890625, "learning_rate": 4.9999877333896274e-05, "loss": 2.4774, "step": 411 }, { "epoch": 0.05, "grad_norm": 0.3359375, "learning_rate": 4.999987061369916e-05, "loss": 2.422, "step": 412 }, { "epoch": 0.06, "grad_norm": 0.349609375, "learning_rate": 4.9999863714297424e-05, "loss": 2.4393, "step": 413 }, { "epoch": 0.06, "grad_norm": 0.322265625, "learning_rate": 4.99998566356911e-05, "loss": 2.444, "step": 414 }, { "epoch": 0.06, "grad_norm": 0.33984375, "learning_rate": 4.999984937788025e-05, "loss": 2.4959, "step": 415 }, { "epoch": 0.06, "grad_norm": 0.337890625, "learning_rate": 4.999984194086493e-05, "loss": 2.4472, "step": 416 }, { "epoch": 0.06, "grad_norm": 0.34765625, "learning_rate": 4.999983432464518e-05, "loss": 2.4237, "step": 417 }, { "epoch": 0.06, "grad_norm": 0.34375, "learning_rate": 4.999982652922105e-05, "loss": 2.4664, "step": 418 }, { "epoch": 0.06, "grad_norm": 0.359375, "learning_rate": 4.999981855459262e-05, "loss": 2.4456, "step": 419 }, { "epoch": 0.06, "grad_norm": 0.341796875, "learning_rate": 4.9999810400759926e-05, "loss": 2.456, "step": 420 }, { "epoch": 0.06, "grad_norm": 0.3203125, "learning_rate": 4.9999802067723036e-05, "loss": 2.4473, "step": 421 }, { "epoch": 0.06, "grad_norm": 0.341796875, "learning_rate": 4.999979355548201e-05, "loss": 2.4524, "step": 422 }, { "epoch": 0.06, "grad_norm": 0.3359375, "learning_rate": 4.99997848640369e-05, "loss": 2.4474, "step": 423 }, { "epoch": 0.06, "grad_norm": 0.365234375, "learning_rate": 4.999977599338778e-05, "loss": 2.4648, "step": 424 }, { "epoch": 0.06, "grad_norm": 0.349609375, "learning_rate": 4.999976694353471e-05, "loss": 2.4788, "step": 425 }, { "epoch": 0.06, "grad_norm": 0.306640625, "learning_rate": 4.9999757714477743e-05, "loss": 2.4148, "step": 426 }, { "epoch": 0.06, "grad_norm": 0.333984375, "learning_rate": 4.9999748306216966e-05, "loss": 2.4252, "step": 427 }, { "epoch": 0.06, "grad_norm": 0.34765625, "learning_rate": 4.999973871875243e-05, "loss": 2.5082, "step": 428 }, { "epoch": 0.06, "grad_norm": 0.33203125, "learning_rate": 4.9999728952084215e-05, "loss": 2.4436, "step": 429 }, { "epoch": 0.06, "grad_norm": 0.33203125, "learning_rate": 4.9999719006212384e-05, "loss": 2.4537, "step": 430 }, { "epoch": 0.06, "grad_norm": 0.322265625, "learning_rate": 4.999970888113701e-05, "loss": 2.4543, "step": 431 }, { "epoch": 0.06, "grad_norm": 0.341796875, "learning_rate": 4.9999698576858165e-05, "loss": 2.4483, "step": 432 }, { "epoch": 0.06, "grad_norm": 0.353515625, "learning_rate": 4.999968809337592e-05, "loss": 2.4678, "step": 433 }, { "epoch": 0.06, "grad_norm": 0.30859375, "learning_rate": 4.9999677430690354e-05, "loss": 2.4624, "step": 434 }, { "epoch": 0.06, "grad_norm": 0.328125, "learning_rate": 4.9999666588801545e-05, "loss": 2.4404, "step": 435 }, { "epoch": 0.06, "grad_norm": 0.33984375, "learning_rate": 4.999965556770957e-05, "loss": 2.4367, "step": 436 }, { "epoch": 0.06, "grad_norm": 0.33203125, "learning_rate": 4.999964436741451e-05, "loss": 2.4701, "step": 437 }, { "epoch": 0.06, "grad_norm": 0.3359375, "learning_rate": 4.9999632987916436e-05, "loss": 2.4245, "step": 438 }, { "epoch": 0.06, "grad_norm": 0.34765625, "learning_rate": 4.999962142921544e-05, "loss": 2.4417, "step": 439 }, { "epoch": 0.06, "grad_norm": 0.326171875, "learning_rate": 4.999960969131159e-05, "loss": 2.4763, "step": 440 }, { "epoch": 0.06, "grad_norm": 0.357421875, "learning_rate": 4.9999597774204993e-05, "loss": 2.4294, "step": 441 }, { "epoch": 0.06, "grad_norm": 0.32421875, "learning_rate": 4.9999585677895715e-05, "loss": 2.422, "step": 442 }, { "epoch": 0.06, "grad_norm": 0.3359375, "learning_rate": 4.9999573402383856e-05, "loss": 2.487, "step": 443 }, { "epoch": 0.06, "grad_norm": 0.32421875, "learning_rate": 4.9999560947669486e-05, "loss": 2.467, "step": 444 }, { "epoch": 0.06, "grad_norm": 0.31640625, "learning_rate": 4.999954831375272e-05, "loss": 2.4703, "step": 445 }, { "epoch": 0.06, "grad_norm": 0.333984375, "learning_rate": 4.999953550063362e-05, "loss": 2.4659, "step": 446 }, { "epoch": 0.06, "grad_norm": 0.314453125, "learning_rate": 4.99995225083123e-05, "loss": 2.4673, "step": 447 }, { "epoch": 0.06, "grad_norm": 0.314453125, "learning_rate": 4.999950933678885e-05, "loss": 2.4708, "step": 448 }, { "epoch": 0.06, "grad_norm": 0.337890625, "learning_rate": 4.999949598606336e-05, "loss": 2.4428, "step": 449 }, { "epoch": 0.06, "grad_norm": 0.3203125, "learning_rate": 4.9999482456135914e-05, "loss": 2.4399, "step": 450 }, { "epoch": 0.06, "grad_norm": 0.341796875, "learning_rate": 4.999946874700663e-05, "loss": 2.4468, "step": 451 }, { "epoch": 0.06, "grad_norm": 0.3125, "learning_rate": 4.99994548586756e-05, "loss": 2.4038, "step": 452 }, { "epoch": 0.06, "grad_norm": 0.314453125, "learning_rate": 4.999944079114291e-05, "loss": 2.4227, "step": 453 }, { "epoch": 0.06, "grad_norm": 0.310546875, "learning_rate": 4.999942654440868e-05, "loss": 2.4426, "step": 454 }, { "epoch": 0.06, "grad_norm": 0.328125, "learning_rate": 4.9999412118473e-05, "loss": 2.4387, "step": 455 }, { "epoch": 0.06, "grad_norm": 0.322265625, "learning_rate": 4.999939751333597e-05, "loss": 2.4535, "step": 456 }, { "epoch": 0.06, "grad_norm": 0.310546875, "learning_rate": 4.999938272899771e-05, "loss": 2.4467, "step": 457 }, { "epoch": 0.06, "grad_norm": 0.302734375, "learning_rate": 4.999936776545832e-05, "loss": 2.4311, "step": 458 }, { "epoch": 0.06, "grad_norm": 0.34375, "learning_rate": 4.999935262271789e-05, "loss": 2.4394, "step": 459 }, { "epoch": 0.06, "grad_norm": 0.3203125, "learning_rate": 4.9999337300776563e-05, "loss": 2.4531, "step": 460 }, { "epoch": 0.06, "grad_norm": 0.333984375, "learning_rate": 4.999932179963441e-05, "loss": 2.4305, "step": 461 }, { "epoch": 0.06, "grad_norm": 0.306640625, "learning_rate": 4.999930611929158e-05, "loss": 2.4171, "step": 462 }, { "epoch": 0.06, "grad_norm": 0.298828125, "learning_rate": 4.9999290259748156e-05, "loss": 2.4444, "step": 463 }, { "epoch": 0.06, "grad_norm": 0.291015625, "learning_rate": 4.9999274221004255e-05, "loss": 2.415, "step": 464 }, { "epoch": 0.06, "grad_norm": 0.328125, "learning_rate": 4.999925800306001e-05, "loss": 2.4374, "step": 465 }, { "epoch": 0.06, "grad_norm": 0.341796875, "learning_rate": 4.9999241605915525e-05, "loss": 2.4307, "step": 466 }, { "epoch": 0.06, "grad_norm": 0.314453125, "learning_rate": 4.999922502957092e-05, "loss": 2.4098, "step": 467 }, { "epoch": 0.06, "grad_norm": 0.31640625, "learning_rate": 4.999920827402631e-05, "loss": 2.4486, "step": 468 }, { "epoch": 0.06, "grad_norm": 0.322265625, "learning_rate": 4.9999191339281814e-05, "loss": 2.4621, "step": 469 }, { "epoch": 0.06, "grad_norm": 0.314453125, "learning_rate": 4.999917422533757e-05, "loss": 2.4437, "step": 470 }, { "epoch": 0.06, "grad_norm": 0.33984375, "learning_rate": 4.999915693219368e-05, "loss": 2.4115, "step": 471 }, { "epoch": 0.06, "grad_norm": 0.318359375, "learning_rate": 4.999913945985028e-05, "loss": 2.4531, "step": 472 }, { "epoch": 0.06, "grad_norm": 0.302734375, "learning_rate": 4.999912180830748e-05, "loss": 2.446, "step": 473 }, { "epoch": 0.06, "grad_norm": 0.3203125, "learning_rate": 4.999910397756542e-05, "loss": 2.4079, "step": 474 }, { "epoch": 0.06, "grad_norm": 0.3359375, "learning_rate": 4.9999085967624235e-05, "loss": 2.4357, "step": 475 }, { "epoch": 0.06, "grad_norm": 0.333984375, "learning_rate": 4.9999067778484045e-05, "loss": 2.4454, "step": 476 }, { "epoch": 0.06, "grad_norm": 0.31640625, "learning_rate": 4.999904941014497e-05, "loss": 2.4493, "step": 477 }, { "epoch": 0.06, "grad_norm": 0.322265625, "learning_rate": 4.999903086260716e-05, "loss": 2.4223, "step": 478 }, { "epoch": 0.06, "grad_norm": 0.310546875, "learning_rate": 4.999901213587074e-05, "loss": 2.4414, "step": 479 }, { "epoch": 0.06, "grad_norm": 0.341796875, "learning_rate": 4.999899322993584e-05, "loss": 2.4335, "step": 480 }, { "epoch": 0.06, "grad_norm": 0.30859375, "learning_rate": 4.99989741448026e-05, "loss": 2.4118, "step": 481 }, { "epoch": 0.06, "grad_norm": 0.326171875, "learning_rate": 4.999895488047115e-05, "loss": 2.4261, "step": 482 }, { "epoch": 0.06, "grad_norm": 0.326171875, "learning_rate": 4.999893543694164e-05, "loss": 2.4279, "step": 483 }, { "epoch": 0.06, "grad_norm": 0.326171875, "learning_rate": 4.999891581421421e-05, "loss": 2.4357, "step": 484 }, { "epoch": 0.06, "grad_norm": 0.32421875, "learning_rate": 4.999889601228899e-05, "loss": 2.4127, "step": 485 }, { "epoch": 0.06, "grad_norm": 0.328125, "learning_rate": 4.999887603116613e-05, "loss": 2.4008, "step": 486 }, { "epoch": 0.06, "grad_norm": 0.32421875, "learning_rate": 4.9998855870845764e-05, "loss": 2.4489, "step": 487 }, { "epoch": 0.07, "grad_norm": 0.333984375, "learning_rate": 4.999883553132804e-05, "loss": 2.413, "step": 488 }, { "epoch": 0.07, "grad_norm": 0.3203125, "learning_rate": 4.999881501261311e-05, "loss": 2.4092, "step": 489 }, { "epoch": 0.07, "grad_norm": 0.3359375, "learning_rate": 4.9998794314701116e-05, "loss": 2.4072, "step": 490 }, { "epoch": 0.07, "grad_norm": 0.310546875, "learning_rate": 4.999877343759221e-05, "loss": 2.4306, "step": 491 }, { "epoch": 0.07, "grad_norm": 0.337890625, "learning_rate": 4.9998752381286537e-05, "loss": 2.4229, "step": 492 }, { "epoch": 0.07, "grad_norm": 0.322265625, "learning_rate": 4.999873114578425e-05, "loss": 2.4165, "step": 493 }, { "epoch": 0.07, "grad_norm": 1.1640625, "learning_rate": 4.99987097310855e-05, "loss": 2.4566, "step": 494 }, { "epoch": 0.07, "grad_norm": 0.33203125, "learning_rate": 4.9998688137190444e-05, "loss": 2.4416, "step": 495 }, { "epoch": 0.07, "grad_norm": 0.3203125, "learning_rate": 4.9998666364099226e-05, "loss": 2.4223, "step": 496 }, { "epoch": 0.07, "grad_norm": 0.337890625, "learning_rate": 4.9998644411812016e-05, "loss": 2.3991, "step": 497 }, { "epoch": 0.07, "grad_norm": 0.306640625, "learning_rate": 4.999862228032897e-05, "loss": 2.4189, "step": 498 }, { "epoch": 0.07, "grad_norm": 0.31640625, "learning_rate": 4.999859996965024e-05, "loss": 2.4393, "step": 499 }, { "epoch": 0.07, "grad_norm": 0.3515625, "learning_rate": 4.9998577479775985e-05, "loss": 2.4158, "step": 500 }, { "epoch": 0.07, "grad_norm": 0.357421875, "learning_rate": 4.9998554810706374e-05, "loss": 2.4186, "step": 501 }, { "epoch": 0.07, "grad_norm": 0.345703125, "learning_rate": 4.999853196244156e-05, "loss": 2.4163, "step": 502 }, { "epoch": 0.07, "grad_norm": 0.3359375, "learning_rate": 4.999850893498172e-05, "loss": 2.4051, "step": 503 }, { "epoch": 0.07, "grad_norm": 0.36328125, "learning_rate": 4.9998485728327004e-05, "loss": 2.4073, "step": 504 }, { "epoch": 0.07, "grad_norm": 0.3359375, "learning_rate": 4.9998462342477594e-05, "loss": 2.4225, "step": 505 }, { "epoch": 0.07, "grad_norm": 0.3515625, "learning_rate": 4.999843877743364e-05, "loss": 2.3949, "step": 506 }, { "epoch": 0.07, "grad_norm": 0.349609375, "learning_rate": 4.999841503319532e-05, "loss": 2.45, "step": 507 }, { "epoch": 0.07, "grad_norm": 0.33984375, "learning_rate": 4.999839110976281e-05, "loss": 2.4245, "step": 508 }, { "epoch": 0.07, "grad_norm": 0.32421875, "learning_rate": 4.999836700713627e-05, "loss": 2.4376, "step": 509 }, { "epoch": 0.07, "grad_norm": 0.328125, "learning_rate": 4.9998342725315885e-05, "loss": 2.3907, "step": 510 }, { "epoch": 0.07, "grad_norm": 0.345703125, "learning_rate": 4.999831826430181e-05, "loss": 2.4216, "step": 511 }, { "epoch": 0.07, "grad_norm": 0.33984375, "learning_rate": 4.999829362409425e-05, "loss": 2.4241, "step": 512 }, { "epoch": 0.07, "grad_norm": 0.33203125, "learning_rate": 4.9998268804693346e-05, "loss": 2.3939, "step": 513 }, { "epoch": 0.07, "grad_norm": 0.333984375, "learning_rate": 4.999824380609931e-05, "loss": 2.4205, "step": 514 }, { "epoch": 0.07, "grad_norm": 0.314453125, "learning_rate": 4.99982186283123e-05, "loss": 2.43, "step": 515 }, { "epoch": 0.07, "grad_norm": 0.328125, "learning_rate": 4.99981932713325e-05, "loss": 2.4262, "step": 516 }, { "epoch": 0.07, "grad_norm": 0.3359375, "learning_rate": 4.9998167735160097e-05, "loss": 2.4259, "step": 517 }, { "epoch": 0.07, "grad_norm": 0.337890625, "learning_rate": 4.999814201979527e-05, "loss": 2.4065, "step": 518 }, { "epoch": 0.07, "grad_norm": 0.31640625, "learning_rate": 4.99981161252382e-05, "loss": 2.4256, "step": 519 }, { "epoch": 0.07, "grad_norm": 0.34375, "learning_rate": 4.999809005148908e-05, "loss": 2.4142, "step": 520 }, { "epoch": 0.07, "grad_norm": 0.31640625, "learning_rate": 4.999806379854809e-05, "loss": 2.4334, "step": 521 }, { "epoch": 0.07, "grad_norm": 0.322265625, "learning_rate": 4.999803736641543e-05, "loss": 2.4416, "step": 522 }, { "epoch": 0.07, "grad_norm": 0.29296875, "learning_rate": 4.9998010755091274e-05, "loss": 2.4468, "step": 523 }, { "epoch": 0.07, "grad_norm": 0.3203125, "learning_rate": 4.999798396457582e-05, "loss": 2.4448, "step": 524 }, { "epoch": 0.07, "grad_norm": 0.322265625, "learning_rate": 4.9997956994869264e-05, "loss": 2.4284, "step": 525 }, { "epoch": 0.07, "grad_norm": 0.322265625, "learning_rate": 4.999792984597179e-05, "loss": 2.4007, "step": 526 }, { "epoch": 0.07, "grad_norm": 0.294921875, "learning_rate": 4.9997902517883604e-05, "loss": 2.4033, "step": 527 }, { "epoch": 0.07, "grad_norm": 0.341796875, "learning_rate": 4.9997875010604896e-05, "loss": 2.4038, "step": 528 }, { "epoch": 0.07, "grad_norm": 0.29296875, "learning_rate": 4.999784732413586e-05, "loss": 2.4085, "step": 529 }, { "epoch": 0.07, "grad_norm": 0.3125, "learning_rate": 4.9997819458476704e-05, "loss": 2.4123, "step": 530 }, { "epoch": 0.07, "grad_norm": 0.33203125, "learning_rate": 4.999779141362761e-05, "loss": 2.4266, "step": 531 }, { "epoch": 0.07, "grad_norm": 0.32421875, "learning_rate": 4.99977631895888e-05, "loss": 2.4183, "step": 532 }, { "epoch": 0.07, "grad_norm": 0.318359375, "learning_rate": 4.999773478636046e-05, "loss": 2.4452, "step": 533 }, { "epoch": 0.07, "grad_norm": 0.330078125, "learning_rate": 4.9997706203942807e-05, "loss": 2.4324, "step": 534 }, { "epoch": 0.07, "grad_norm": 0.330078125, "learning_rate": 4.9997677442336036e-05, "loss": 2.4055, "step": 535 }, { "epoch": 0.07, "grad_norm": 0.318359375, "learning_rate": 4.999764850154035e-05, "loss": 2.4343, "step": 536 }, { "epoch": 0.07, "grad_norm": 0.328125, "learning_rate": 4.999761938155597e-05, "loss": 2.437, "step": 537 }, { "epoch": 0.07, "grad_norm": 0.3046875, "learning_rate": 4.999759008238309e-05, "loss": 2.4201, "step": 538 }, { "epoch": 0.07, "grad_norm": 0.314453125, "learning_rate": 4.999756060402194e-05, "loss": 2.384, "step": 539 }, { "epoch": 0.07, "grad_norm": 0.30859375, "learning_rate": 4.999753094647271e-05, "loss": 2.3963, "step": 540 }, { "epoch": 0.07, "grad_norm": 0.306640625, "learning_rate": 4.9997501109735625e-05, "loss": 2.438, "step": 541 }, { "epoch": 0.07, "grad_norm": 0.34765625, "learning_rate": 4.9997471093810896e-05, "loss": 2.4763, "step": 542 }, { "epoch": 0.07, "grad_norm": 0.322265625, "learning_rate": 4.999744089869873e-05, "loss": 2.3999, "step": 543 }, { "epoch": 0.07, "grad_norm": 0.318359375, "learning_rate": 4.9997410524399355e-05, "loss": 2.4293, "step": 544 }, { "epoch": 0.07, "grad_norm": 0.337890625, "learning_rate": 4.9997379970912986e-05, "loss": 2.3826, "step": 545 }, { "epoch": 0.07, "grad_norm": 0.33203125, "learning_rate": 4.9997349238239845e-05, "loss": 2.4056, "step": 546 }, { "epoch": 0.07, "grad_norm": 0.3125, "learning_rate": 4.9997318326380144e-05, "loss": 2.4119, "step": 547 }, { "epoch": 0.07, "grad_norm": 0.37890625, "learning_rate": 4.9997287235334106e-05, "loss": 2.4329, "step": 548 }, { "epoch": 0.07, "grad_norm": 0.33984375, "learning_rate": 4.999725596510196e-05, "loss": 2.4019, "step": 549 }, { "epoch": 0.07, "grad_norm": 0.34375, "learning_rate": 4.9997224515683926e-05, "loss": 2.4228, "step": 550 }, { "epoch": 0.07, "grad_norm": 0.310546875, "learning_rate": 4.9997192887080226e-05, "loss": 2.3982, "step": 551 }, { "epoch": 0.07, "grad_norm": 0.349609375, "learning_rate": 4.9997161079291096e-05, "loss": 2.4167, "step": 552 }, { "epoch": 0.07, "grad_norm": 0.328125, "learning_rate": 4.9997129092316754e-05, "loss": 2.3809, "step": 553 }, { "epoch": 0.07, "grad_norm": 0.341796875, "learning_rate": 4.999709692615744e-05, "loss": 2.3917, "step": 554 }, { "epoch": 0.07, "grad_norm": 0.306640625, "learning_rate": 4.9997064580813376e-05, "loss": 2.4344, "step": 555 }, { "epoch": 0.07, "grad_norm": 0.28515625, "learning_rate": 4.9997032056284794e-05, "loss": 2.4098, "step": 556 }, { "epoch": 0.07, "grad_norm": 0.306640625, "learning_rate": 4.999699935257193e-05, "loss": 2.4306, "step": 557 }, { "epoch": 0.07, "grad_norm": 0.33984375, "learning_rate": 4.999696646967502e-05, "loss": 2.4201, "step": 558 }, { "epoch": 0.07, "grad_norm": 0.33984375, "learning_rate": 4.999693340759429e-05, "loss": 2.4265, "step": 559 }, { "epoch": 0.07, "grad_norm": 0.3203125, "learning_rate": 4.999690016633e-05, "loss": 2.3932, "step": 560 }, { "epoch": 0.07, "grad_norm": 0.3046875, "learning_rate": 4.999686674588236e-05, "loss": 2.4351, "step": 561 }, { "epoch": 0.07, "grad_norm": 0.3203125, "learning_rate": 4.999683314625163e-05, "loss": 2.4397, "step": 562 }, { "epoch": 0.08, "grad_norm": 0.318359375, "learning_rate": 4.999679936743804e-05, "loss": 2.4029, "step": 563 }, { "epoch": 0.08, "grad_norm": 0.310546875, "learning_rate": 4.9996765409441835e-05, "loss": 2.4108, "step": 564 }, { "epoch": 0.08, "grad_norm": 0.287109375, "learning_rate": 4.9996731272263264e-05, "loss": 2.4101, "step": 565 }, { "epoch": 0.08, "grad_norm": 0.30859375, "learning_rate": 4.9996696955902564e-05, "loss": 2.4148, "step": 566 }, { "epoch": 0.08, "grad_norm": 0.310546875, "learning_rate": 4.999666246035998e-05, "loss": 2.386, "step": 567 }, { "epoch": 0.08, "grad_norm": 0.306640625, "learning_rate": 4.9996627785635766e-05, "loss": 2.4212, "step": 568 }, { "epoch": 0.08, "grad_norm": 0.3125, "learning_rate": 4.999659293173017e-05, "loss": 2.3964, "step": 569 }, { "epoch": 0.08, "grad_norm": 0.31640625, "learning_rate": 4.9996557898643435e-05, "loss": 2.3764, "step": 570 }, { "epoch": 0.08, "grad_norm": 0.3125, "learning_rate": 4.999652268637581e-05, "loss": 2.4112, "step": 571 }, { "epoch": 0.08, "grad_norm": 0.296875, "learning_rate": 4.999648729492756e-05, "loss": 2.3876, "step": 572 }, { "epoch": 0.08, "grad_norm": 0.291015625, "learning_rate": 4.9996451724298945e-05, "loss": 2.382, "step": 573 }, { "epoch": 0.08, "grad_norm": 0.3125, "learning_rate": 4.999641597449019e-05, "loss": 2.3936, "step": 574 }, { "epoch": 0.08, "grad_norm": 0.34375, "learning_rate": 4.999638004550158e-05, "loss": 2.3724, "step": 575 }, { "epoch": 0.08, "grad_norm": 0.2890625, "learning_rate": 4.999634393733336e-05, "loss": 2.4187, "step": 576 }, { "epoch": 0.08, "grad_norm": 0.333984375, "learning_rate": 4.999630764998578e-05, "loss": 2.4161, "step": 577 }, { "epoch": 0.08, "grad_norm": 0.330078125, "learning_rate": 4.999627118345912e-05, "loss": 2.4031, "step": 578 }, { "epoch": 0.08, "grad_norm": 0.318359375, "learning_rate": 4.9996234537753626e-05, "loss": 2.4278, "step": 579 }, { "epoch": 0.08, "grad_norm": 0.3359375, "learning_rate": 4.999619771286957e-05, "loss": 2.4109, "step": 580 }, { "epoch": 0.08, "grad_norm": 0.333984375, "learning_rate": 4.999616070880721e-05, "loss": 2.4255, "step": 581 }, { "epoch": 0.08, "grad_norm": 0.3203125, "learning_rate": 4.999612352556682e-05, "loss": 2.4201, "step": 582 }, { "epoch": 0.08, "grad_norm": 0.322265625, "learning_rate": 4.999608616314865e-05, "loss": 2.3959, "step": 583 }, { "epoch": 0.08, "grad_norm": 0.32421875, "learning_rate": 4.9996048621552985e-05, "loss": 2.4137, "step": 584 }, { "epoch": 0.08, "grad_norm": 0.322265625, "learning_rate": 4.999601090078009e-05, "loss": 2.4037, "step": 585 }, { "epoch": 0.08, "grad_norm": 0.298828125, "learning_rate": 4.999597300083022e-05, "loss": 2.4169, "step": 586 }, { "epoch": 0.08, "grad_norm": 0.30078125, "learning_rate": 4.999593492170367e-05, "loss": 2.4171, "step": 587 }, { "epoch": 0.08, "grad_norm": 0.314453125, "learning_rate": 4.99958966634007e-05, "loss": 2.417, "step": 588 }, { "epoch": 0.08, "grad_norm": 0.32421875, "learning_rate": 4.999585822592159e-05, "loss": 2.4217, "step": 589 }, { "epoch": 0.08, "grad_norm": 0.328125, "learning_rate": 4.99958196092666e-05, "loss": 2.4152, "step": 590 }, { "epoch": 0.08, "grad_norm": 0.310546875, "learning_rate": 4.999578081343603e-05, "loss": 2.4419, "step": 591 }, { "epoch": 0.08, "grad_norm": 0.3046875, "learning_rate": 4.999574183843014e-05, "loss": 2.3782, "step": 592 }, { "epoch": 0.08, "grad_norm": 0.318359375, "learning_rate": 4.9995702684249226e-05, "loss": 2.4133, "step": 593 }, { "epoch": 0.08, "grad_norm": 0.29296875, "learning_rate": 4.999566335089355e-05, "loss": 2.3926, "step": 594 }, { "epoch": 0.08, "grad_norm": 0.31640625, "learning_rate": 4.9995623838363405e-05, "loss": 2.386, "step": 595 }, { "epoch": 0.08, "grad_norm": 0.310546875, "learning_rate": 4.999558414665907e-05, "loss": 2.3923, "step": 596 }, { "epoch": 0.08, "grad_norm": 0.318359375, "learning_rate": 4.999554427578083e-05, "loss": 2.3962, "step": 597 }, { "epoch": 0.08, "grad_norm": 0.31640625, "learning_rate": 4.999550422572897e-05, "loss": 2.4087, "step": 598 }, { "epoch": 0.08, "grad_norm": 0.380859375, "learning_rate": 4.999546399650379e-05, "loss": 2.4402, "step": 599 }, { "epoch": 0.08, "grad_norm": 0.291015625, "learning_rate": 4.999542358810556e-05, "loss": 2.399, "step": 600 }, { "epoch": 0.08, "grad_norm": 0.314453125, "learning_rate": 4.999538300053458e-05, "loss": 2.3878, "step": 601 }, { "epoch": 0.08, "grad_norm": 0.3125, "learning_rate": 4.999534223379114e-05, "loss": 2.3778, "step": 602 }, { "epoch": 0.08, "grad_norm": 0.3125, "learning_rate": 4.9995301287875526e-05, "loss": 2.4006, "step": 603 }, { "epoch": 0.08, "grad_norm": 0.322265625, "learning_rate": 4.9995260162788046e-05, "loss": 2.3839, "step": 604 }, { "epoch": 0.08, "grad_norm": 0.3359375, "learning_rate": 4.9995218858528975e-05, "loss": 2.4129, "step": 605 }, { "epoch": 0.08, "grad_norm": 0.306640625, "learning_rate": 4.999517737509862e-05, "loss": 2.4279, "step": 606 }, { "epoch": 0.08, "grad_norm": 0.33984375, "learning_rate": 4.999513571249728e-05, "loss": 2.4208, "step": 607 }, { "epoch": 0.08, "grad_norm": 0.30859375, "learning_rate": 4.999509387072525e-05, "loss": 2.4424, "step": 608 }, { "epoch": 0.08, "grad_norm": 0.333984375, "learning_rate": 4.9995051849782834e-05, "loss": 2.3839, "step": 609 }, { "epoch": 0.08, "grad_norm": 0.296875, "learning_rate": 4.999500964967033e-05, "loss": 2.4247, "step": 610 }, { "epoch": 0.08, "grad_norm": 0.3046875, "learning_rate": 4.9994967270388036e-05, "loss": 2.4049, "step": 611 }, { "epoch": 0.08, "grad_norm": 0.30078125, "learning_rate": 4.9994924711936265e-05, "loss": 2.3906, "step": 612 }, { "epoch": 0.08, "grad_norm": 0.3046875, "learning_rate": 4.9994881974315314e-05, "loss": 2.4079, "step": 613 }, { "epoch": 0.08, "grad_norm": 0.31640625, "learning_rate": 4.999483905752549e-05, "loss": 2.4092, "step": 614 }, { "epoch": 0.08, "grad_norm": 0.306640625, "learning_rate": 4.999479596156711e-05, "loss": 2.4239, "step": 615 }, { "epoch": 0.08, "grad_norm": 0.330078125, "learning_rate": 4.9994752686440474e-05, "loss": 2.383, "step": 616 }, { "epoch": 0.08, "grad_norm": 0.3203125, "learning_rate": 4.9994709232145895e-05, "loss": 2.3732, "step": 617 }, { "epoch": 0.08, "grad_norm": 0.341796875, "learning_rate": 4.999466559868368e-05, "loss": 2.3701, "step": 618 }, { "epoch": 0.08, "grad_norm": 0.349609375, "learning_rate": 4.999462178605415e-05, "loss": 2.3856, "step": 619 }, { "epoch": 0.08, "grad_norm": 0.3046875, "learning_rate": 4.999457779425761e-05, "loss": 2.3858, "step": 620 }, { "epoch": 0.08, "grad_norm": 0.330078125, "learning_rate": 4.999453362329439e-05, "loss": 2.3873, "step": 621 }, { "epoch": 0.08, "grad_norm": 0.3203125, "learning_rate": 4.999448927316479e-05, "loss": 2.4028, "step": 622 }, { "epoch": 0.08, "grad_norm": 0.32421875, "learning_rate": 4.999444474386914e-05, "loss": 2.3778, "step": 623 }, { "epoch": 0.08, "grad_norm": 0.30859375, "learning_rate": 4.999440003540775e-05, "loss": 2.3731, "step": 624 }, { "epoch": 0.08, "grad_norm": 0.314453125, "learning_rate": 4.9994355147780944e-05, "loss": 2.3986, "step": 625 }, { "epoch": 0.08, "grad_norm": 0.33203125, "learning_rate": 4.9994310080989046e-05, "loss": 2.4022, "step": 626 }, { "epoch": 0.08, "grad_norm": 0.3359375, "learning_rate": 4.999426483503237e-05, "loss": 2.3975, "step": 627 }, { "epoch": 0.08, "grad_norm": 0.302734375, "learning_rate": 4.9994219409911256e-05, "loss": 2.4051, "step": 628 }, { "epoch": 0.08, "grad_norm": 0.330078125, "learning_rate": 4.999417380562602e-05, "loss": 2.4257, "step": 629 }, { "epoch": 0.08, "grad_norm": 0.322265625, "learning_rate": 4.999412802217699e-05, "loss": 2.4136, "step": 630 }, { "epoch": 0.08, "grad_norm": 0.3125, "learning_rate": 4.9994082059564496e-05, "loss": 2.384, "step": 631 }, { "epoch": 0.08, "grad_norm": 0.30859375, "learning_rate": 4.999403591778886e-05, "loss": 2.3973, "step": 632 }, { "epoch": 0.08, "grad_norm": 0.310546875, "learning_rate": 4.999398959685042e-05, "loss": 2.3867, "step": 633 }, { "epoch": 0.08, "grad_norm": 0.3125, "learning_rate": 4.999394309674951e-05, "loss": 2.399, "step": 634 }, { "epoch": 0.08, "grad_norm": 0.3359375, "learning_rate": 4.9993896417486465e-05, "loss": 2.3901, "step": 635 }, { "epoch": 0.08, "grad_norm": 0.3046875, "learning_rate": 4.999384955906161e-05, "loss": 2.3866, "step": 636 }, { "epoch": 0.08, "grad_norm": 0.31640625, "learning_rate": 4.999380252147527e-05, "loss": 2.3938, "step": 637 }, { "epoch": 0.09, "grad_norm": 0.294921875, "learning_rate": 4.999375530472782e-05, "loss": 2.3909, "step": 638 }, { "epoch": 0.09, "grad_norm": 0.330078125, "learning_rate": 4.9993707908819564e-05, "loss": 2.4256, "step": 639 }, { "epoch": 0.09, "grad_norm": 0.3203125, "learning_rate": 4.9993660333750855e-05, "loss": 2.3891, "step": 640 }, { "epoch": 0.09, "grad_norm": 0.318359375, "learning_rate": 4.999361257952204e-05, "loss": 2.3729, "step": 641 }, { "epoch": 0.09, "grad_norm": 0.294921875, "learning_rate": 4.999356464613345e-05, "loss": 2.4003, "step": 642 }, { "epoch": 0.09, "grad_norm": 0.302734375, "learning_rate": 4.9993516533585436e-05, "loss": 2.3672, "step": 643 }, { "epoch": 0.09, "grad_norm": 0.31640625, "learning_rate": 4.9993468241878336e-05, "loss": 2.3881, "step": 644 }, { "epoch": 0.09, "grad_norm": 0.328125, "learning_rate": 4.999341977101249e-05, "loss": 2.4081, "step": 645 }, { "epoch": 0.09, "grad_norm": 0.2890625, "learning_rate": 4.999337112098827e-05, "loss": 2.4056, "step": 646 }, { "epoch": 0.09, "grad_norm": 0.294921875, "learning_rate": 4.9993322291806e-05, "loss": 2.3635, "step": 647 }, { "epoch": 0.09, "grad_norm": 0.3046875, "learning_rate": 4.999327328346605e-05, "loss": 2.4038, "step": 648 }, { "epoch": 0.09, "grad_norm": 0.291015625, "learning_rate": 4.999322409596875e-05, "loss": 2.4168, "step": 649 }, { "epoch": 0.09, "grad_norm": 0.3046875, "learning_rate": 4.999317472931447e-05, "loss": 2.3797, "step": 650 }, { "epoch": 0.09, "grad_norm": 0.322265625, "learning_rate": 4.9993125183503565e-05, "loss": 2.416, "step": 651 }, { "epoch": 0.09, "grad_norm": 0.287109375, "learning_rate": 4.999307545853638e-05, "loss": 2.3802, "step": 652 }, { "epoch": 0.09, "grad_norm": 0.345703125, "learning_rate": 4.999302555441326e-05, "loss": 2.3746, "step": 653 }, { "epoch": 0.09, "grad_norm": 1.78125, "learning_rate": 4.999297547113459e-05, "loss": 2.3671, "step": 654 }, { "epoch": 0.09, "grad_norm": 0.291015625, "learning_rate": 4.9992925208700714e-05, "loss": 2.3763, "step": 655 }, { "epoch": 0.09, "grad_norm": 0.31640625, "learning_rate": 4.9992874767111996e-05, "loss": 2.3951, "step": 656 }, { "epoch": 0.09, "grad_norm": 0.3359375, "learning_rate": 4.999282414636879e-05, "loss": 2.4, "step": 657 }, { "epoch": 0.09, "grad_norm": 0.314453125, "learning_rate": 4.999277334647147e-05, "loss": 2.404, "step": 658 }, { "epoch": 0.09, "grad_norm": 0.310546875, "learning_rate": 4.999272236742039e-05, "loss": 2.3681, "step": 659 }, { "epoch": 0.09, "grad_norm": 0.3046875, "learning_rate": 4.9992671209215926e-05, "loss": 2.38, "step": 660 }, { "epoch": 0.09, "grad_norm": 0.306640625, "learning_rate": 4.999261987185844e-05, "loss": 2.3908, "step": 661 }, { "epoch": 0.09, "grad_norm": 0.3515625, "learning_rate": 4.999256835534829e-05, "loss": 2.3688, "step": 662 }, { "epoch": 0.09, "grad_norm": 0.306640625, "learning_rate": 4.9992516659685865e-05, "loss": 2.379, "step": 663 }, { "epoch": 0.09, "grad_norm": 0.349609375, "learning_rate": 4.999246478487152e-05, "loss": 2.3708, "step": 664 }, { "epoch": 0.09, "grad_norm": 0.310546875, "learning_rate": 4.999241273090563e-05, "loss": 2.3568, "step": 665 }, { "epoch": 0.09, "grad_norm": 0.33984375, "learning_rate": 4.999236049778857e-05, "loss": 2.4089, "step": 666 }, { "epoch": 0.09, "grad_norm": 0.333984375, "learning_rate": 4.9992308085520716e-05, "loss": 2.3823, "step": 667 }, { "epoch": 0.09, "grad_norm": 0.302734375, "learning_rate": 4.999225549410245e-05, "loss": 2.4046, "step": 668 }, { "epoch": 0.09, "grad_norm": 0.3046875, "learning_rate": 4.9992202723534125e-05, "loss": 2.3975, "step": 669 }, { "epoch": 0.09, "grad_norm": 0.29296875, "learning_rate": 4.9992149773816144e-05, "loss": 2.3732, "step": 670 }, { "epoch": 0.09, "grad_norm": 0.33203125, "learning_rate": 4.9992096644948885e-05, "loss": 2.3774, "step": 671 }, { "epoch": 0.09, "grad_norm": 0.3125, "learning_rate": 4.999204333693271e-05, "loss": 2.3945, "step": 672 }, { "epoch": 0.09, "grad_norm": 0.330078125, "learning_rate": 4.999198984976802e-05, "loss": 2.39, "step": 673 }, { "epoch": 0.09, "grad_norm": 0.30859375, "learning_rate": 4.999193618345518e-05, "loss": 2.3866, "step": 674 }, { "epoch": 0.09, "grad_norm": 0.3359375, "learning_rate": 4.99918823379946e-05, "loss": 2.3609, "step": 675 }, { "epoch": 0.09, "grad_norm": 0.296875, "learning_rate": 4.999182831338665e-05, "loss": 2.3783, "step": 676 }, { "epoch": 0.09, "grad_norm": 0.294921875, "learning_rate": 4.9991774109631705e-05, "loss": 2.3949, "step": 677 }, { "epoch": 0.09, "grad_norm": 0.318359375, "learning_rate": 4.999171972673018e-05, "loss": 2.3755, "step": 678 }, { "epoch": 0.09, "grad_norm": 0.314453125, "learning_rate": 4.999166516468246e-05, "loss": 2.347, "step": 679 }, { "epoch": 0.09, "grad_norm": 0.3125, "learning_rate": 4.999161042348891e-05, "loss": 2.3837, "step": 680 }, { "epoch": 0.09, "grad_norm": 0.322265625, "learning_rate": 4.999155550314996e-05, "loss": 2.3555, "step": 681 }, { "epoch": 0.09, "grad_norm": 0.640625, "learning_rate": 4.999150040366597e-05, "loss": 2.4063, "step": 682 }, { "epoch": 0.09, "grad_norm": 0.337890625, "learning_rate": 4.999144512503735e-05, "loss": 2.3869, "step": 683 }, { "epoch": 0.09, "grad_norm": 0.298828125, "learning_rate": 4.99913896672645e-05, "loss": 2.4093, "step": 684 }, { "epoch": 0.09, "grad_norm": 0.31640625, "learning_rate": 4.999133403034782e-05, "loss": 2.3969, "step": 685 }, { "epoch": 0.09, "grad_norm": 0.306640625, "learning_rate": 4.99912782142877e-05, "loss": 2.3922, "step": 686 }, { "epoch": 0.09, "grad_norm": 0.326171875, "learning_rate": 4.999122221908453e-05, "loss": 2.3839, "step": 687 }, { "epoch": 0.09, "grad_norm": 0.333984375, "learning_rate": 4.999116604473873e-05, "loss": 2.3933, "step": 688 }, { "epoch": 0.09, "grad_norm": 0.32421875, "learning_rate": 4.99911096912507e-05, "loss": 2.3906, "step": 689 }, { "epoch": 0.09, "grad_norm": 0.318359375, "learning_rate": 4.9991053158620846e-05, "loss": 2.4046, "step": 690 }, { "epoch": 0.09, "grad_norm": 0.310546875, "learning_rate": 4.999099644684956e-05, "loss": 2.3635, "step": 691 }, { "epoch": 0.09, "grad_norm": 0.318359375, "learning_rate": 4.999093955593726e-05, "loss": 2.4143, "step": 692 }, { "epoch": 0.09, "grad_norm": 0.30078125, "learning_rate": 4.999088248588435e-05, "loss": 2.3838, "step": 693 }, { "epoch": 0.09, "grad_norm": 0.326171875, "learning_rate": 4.999082523669123e-05, "loss": 2.399, "step": 694 }, { "epoch": 0.09, "grad_norm": 0.337890625, "learning_rate": 4.999076780835833e-05, "loss": 2.3942, "step": 695 }, { "epoch": 0.09, "grad_norm": 0.310546875, "learning_rate": 4.999071020088605e-05, "loss": 2.3659, "step": 696 }, { "epoch": 0.09, "grad_norm": 0.333984375, "learning_rate": 4.99906524142748e-05, "loss": 2.3907, "step": 697 }, { "epoch": 0.09, "grad_norm": 0.30078125, "learning_rate": 4.9990594448525e-05, "loss": 2.3401, "step": 698 }, { "epoch": 0.09, "grad_norm": 0.306640625, "learning_rate": 4.999053630363706e-05, "loss": 2.4116, "step": 699 }, { "epoch": 0.09, "grad_norm": 0.306640625, "learning_rate": 4.9990477979611404e-05, "loss": 2.3859, "step": 700 }, { "epoch": 0.09, "grad_norm": 0.31640625, "learning_rate": 4.9990419476448444e-05, "loss": 2.4033, "step": 701 }, { "epoch": 0.09, "grad_norm": 0.33203125, "learning_rate": 4.999036079414861e-05, "loss": 2.362, "step": 702 }, { "epoch": 0.09, "grad_norm": 0.291015625, "learning_rate": 4.99903019327123e-05, "loss": 2.3745, "step": 703 }, { "epoch": 0.09, "grad_norm": 0.302734375, "learning_rate": 4.999024289213996e-05, "loss": 2.3914, "step": 704 }, { "epoch": 0.09, "grad_norm": 0.298828125, "learning_rate": 4.9990183672432014e-05, "loss": 2.3726, "step": 705 }, { "epoch": 0.09, "grad_norm": 0.3046875, "learning_rate": 4.999012427358886e-05, "loss": 2.3911, "step": 706 }, { "epoch": 0.09, "grad_norm": 0.30078125, "learning_rate": 4.999006469561094e-05, "loss": 2.4102, "step": 707 }, { "epoch": 0.09, "grad_norm": 0.31640625, "learning_rate": 4.9990004938498694e-05, "loss": 2.3978, "step": 708 }, { "epoch": 0.09, "grad_norm": 0.33984375, "learning_rate": 4.998994500225253e-05, "loss": 2.3679, "step": 709 }, { "epoch": 0.09, "grad_norm": 0.291015625, "learning_rate": 4.998988488687288e-05, "loss": 2.3659, "step": 710 }, { "epoch": 0.09, "grad_norm": 0.28515625, "learning_rate": 4.998982459236019e-05, "loss": 2.4028, "step": 711 }, { "epoch": 0.09, "grad_norm": 0.3125, "learning_rate": 4.9989764118714875e-05, "loss": 2.3936, "step": 712 }, { "epoch": 0.1, "grad_norm": 0.30078125, "learning_rate": 4.998970346593739e-05, "loss": 2.4009, "step": 713 }, { "epoch": 0.1, "grad_norm": 0.31640625, "learning_rate": 4.998964263402814e-05, "loss": 2.3522, "step": 714 }, { "epoch": 0.1, "grad_norm": 0.291015625, "learning_rate": 4.998958162298758e-05, "loss": 2.3744, "step": 715 }, { "epoch": 0.1, "grad_norm": 0.3046875, "learning_rate": 4.9989520432816144e-05, "loss": 2.3945, "step": 716 }, { "epoch": 0.1, "grad_norm": 0.29296875, "learning_rate": 4.998945906351427e-05, "loss": 2.3581, "step": 717 }, { "epoch": 0.1, "grad_norm": 0.30859375, "learning_rate": 4.99893975150824e-05, "loss": 2.3962, "step": 718 }, { "epoch": 0.1, "grad_norm": 0.330078125, "learning_rate": 4.9989335787520977e-05, "loss": 2.3586, "step": 719 }, { "epoch": 0.1, "grad_norm": 0.30859375, "learning_rate": 4.9989273880830435e-05, "loss": 2.3701, "step": 720 }, { "epoch": 0.1, "grad_norm": 0.314453125, "learning_rate": 4.998921179501123e-05, "loss": 2.3656, "step": 721 }, { "epoch": 0.1, "grad_norm": 0.322265625, "learning_rate": 4.998914953006379e-05, "loss": 2.3778, "step": 722 }, { "epoch": 0.1, "grad_norm": 0.2890625, "learning_rate": 4.998908708598858e-05, "loss": 2.3601, "step": 723 }, { "epoch": 0.1, "grad_norm": 0.322265625, "learning_rate": 4.998902446278603e-05, "loss": 2.4138, "step": 724 }, { "epoch": 0.1, "grad_norm": 0.34375, "learning_rate": 4.99889616604566e-05, "loss": 2.4272, "step": 725 }, { "epoch": 0.1, "grad_norm": 0.35546875, "learning_rate": 4.9988898679000746e-05, "loss": 2.4095, "step": 726 }, { "epoch": 0.1, "grad_norm": 0.298828125, "learning_rate": 4.9988835518418906e-05, "loss": 2.4171, "step": 727 }, { "epoch": 0.1, "grad_norm": 0.310546875, "learning_rate": 4.998877217871154e-05, "loss": 2.3968, "step": 728 }, { "epoch": 0.1, "grad_norm": 0.291015625, "learning_rate": 4.99887086598791e-05, "loss": 2.4043, "step": 729 }, { "epoch": 0.1, "grad_norm": 0.322265625, "learning_rate": 4.9988644961922035e-05, "loss": 2.398, "step": 730 }, { "epoch": 0.1, "grad_norm": 0.3125, "learning_rate": 4.998858108484081e-05, "loss": 2.3796, "step": 731 }, { "epoch": 0.1, "grad_norm": 0.3125, "learning_rate": 4.998851702863589e-05, "loss": 2.4099, "step": 732 }, { "epoch": 0.1, "grad_norm": 0.314453125, "learning_rate": 4.9988452793307714e-05, "loss": 2.3976, "step": 733 }, { "epoch": 0.1, "grad_norm": 0.3046875, "learning_rate": 4.998838837885677e-05, "loss": 2.3902, "step": 734 }, { "epoch": 0.1, "grad_norm": 0.349609375, "learning_rate": 4.9988323785283486e-05, "loss": 2.3552, "step": 735 }, { "epoch": 0.1, "grad_norm": 0.337890625, "learning_rate": 4.998825901258835e-05, "loss": 2.3758, "step": 736 }, { "epoch": 0.1, "grad_norm": 0.30078125, "learning_rate": 4.998819406077182e-05, "loss": 2.3604, "step": 737 }, { "epoch": 0.1, "grad_norm": 0.3125, "learning_rate": 4.9988128929834355e-05, "loss": 2.4, "step": 738 }, { "epoch": 0.1, "grad_norm": 0.306640625, "learning_rate": 4.998806361977643e-05, "loss": 2.3713, "step": 739 }, { "epoch": 0.1, "grad_norm": 0.34765625, "learning_rate": 4.998799813059851e-05, "loss": 2.3395, "step": 740 }, { "epoch": 0.1, "grad_norm": 0.3203125, "learning_rate": 4.9987932462301066e-05, "loss": 2.3743, "step": 741 }, { "epoch": 0.1, "grad_norm": 0.330078125, "learning_rate": 4.998786661488457e-05, "loss": 2.3589, "step": 742 }, { "epoch": 0.1, "grad_norm": 0.306640625, "learning_rate": 4.998780058834949e-05, "loss": 2.3817, "step": 743 }, { "epoch": 0.1, "grad_norm": 0.306640625, "learning_rate": 4.998773438269629e-05, "loss": 2.3945, "step": 744 }, { "epoch": 0.1, "grad_norm": 0.33203125, "learning_rate": 4.998766799792547e-05, "loss": 2.4029, "step": 745 }, { "epoch": 0.1, "grad_norm": 0.318359375, "learning_rate": 4.998760143403749e-05, "loss": 2.3965, "step": 746 }, { "epoch": 0.1, "grad_norm": 0.3125, "learning_rate": 4.9987534691032824e-05, "loss": 2.3949, "step": 747 }, { "epoch": 0.1, "grad_norm": 0.314453125, "learning_rate": 4.998746776891196e-05, "loss": 2.3824, "step": 748 }, { "epoch": 0.1, "grad_norm": 0.294921875, "learning_rate": 4.998740066767536e-05, "loss": 2.3906, "step": 749 }, { "epoch": 0.1, "grad_norm": 0.279296875, "learning_rate": 4.998733338732353e-05, "loss": 2.3788, "step": 750 }, { "epoch": 0.1, "grad_norm": 0.322265625, "learning_rate": 4.998726592785693e-05, "loss": 2.3518, "step": 751 }, { "epoch": 0.1, "grad_norm": 0.328125, "learning_rate": 4.998719828927606e-05, "loss": 2.3548, "step": 752 }, { "epoch": 0.1, "grad_norm": 0.3125, "learning_rate": 4.9987130471581395e-05, "loss": 2.3647, "step": 753 }, { "epoch": 0.1, "grad_norm": 0.287109375, "learning_rate": 4.998706247477343e-05, "loss": 2.3845, "step": 754 }, { "epoch": 0.1, "grad_norm": 0.314453125, "learning_rate": 4.998699429885264e-05, "loss": 2.3803, "step": 755 }, { "epoch": 0.1, "grad_norm": 0.294921875, "learning_rate": 4.9986925943819515e-05, "loss": 2.3831, "step": 756 }, { "epoch": 0.1, "grad_norm": 0.30078125, "learning_rate": 4.998685740967456e-05, "loss": 2.3739, "step": 757 }, { "epoch": 0.1, "grad_norm": 0.3125, "learning_rate": 4.9986788696418266e-05, "loss": 2.3852, "step": 758 }, { "epoch": 0.1, "grad_norm": 0.298828125, "learning_rate": 4.99867198040511e-05, "loss": 2.3798, "step": 759 }, { "epoch": 0.1, "grad_norm": 0.287109375, "learning_rate": 4.998665073257358e-05, "loss": 2.384, "step": 760 }, { "epoch": 0.1, "grad_norm": 0.294921875, "learning_rate": 4.9986581481986184e-05, "loss": 2.3996, "step": 761 }, { "epoch": 0.1, "grad_norm": 0.314453125, "learning_rate": 4.9986512052289424e-05, "loss": 2.3907, "step": 762 }, { "epoch": 0.1, "grad_norm": 0.314453125, "learning_rate": 4.998644244348379e-05, "loss": 2.3609, "step": 763 }, { "epoch": 0.1, "grad_norm": 0.296875, "learning_rate": 4.998637265556979e-05, "loss": 2.3809, "step": 764 }, { "epoch": 0.1, "grad_norm": 0.28515625, "learning_rate": 4.998630268854791e-05, "loss": 2.3903, "step": 765 }, { "epoch": 0.1, "grad_norm": 0.306640625, "learning_rate": 4.9986232542418656e-05, "loss": 2.3948, "step": 766 }, { "epoch": 0.1, "grad_norm": 0.3359375, "learning_rate": 4.998616221718254e-05, "loss": 2.396, "step": 767 }, { "epoch": 0.1, "grad_norm": 0.30078125, "learning_rate": 4.998609171284005e-05, "loss": 2.3556, "step": 768 }, { "epoch": 0.1, "grad_norm": 0.30859375, "learning_rate": 4.998602102939171e-05, "loss": 2.3533, "step": 769 }, { "epoch": 0.1, "grad_norm": 0.3046875, "learning_rate": 4.998595016683801e-05, "loss": 2.3545, "step": 770 }, { "epoch": 0.1, "grad_norm": 0.322265625, "learning_rate": 4.998587912517947e-05, "loss": 2.3641, "step": 771 }, { "epoch": 0.1, "grad_norm": 0.318359375, "learning_rate": 4.998580790441659e-05, "loss": 2.4024, "step": 772 }, { "epoch": 0.1, "grad_norm": 0.33984375, "learning_rate": 4.998573650454988e-05, "loss": 2.3718, "step": 773 }, { "epoch": 0.1, "grad_norm": 0.296875, "learning_rate": 4.9985664925579864e-05, "loss": 2.3735, "step": 774 }, { "epoch": 0.1, "grad_norm": 0.318359375, "learning_rate": 4.998559316750705e-05, "loss": 2.372, "step": 775 }, { "epoch": 0.1, "grad_norm": 0.314453125, "learning_rate": 4.998552123033194e-05, "loss": 2.3519, "step": 776 }, { "epoch": 0.1, "grad_norm": 0.306640625, "learning_rate": 4.998544911405507e-05, "loss": 2.3705, "step": 777 }, { "epoch": 0.1, "grad_norm": 0.33203125, "learning_rate": 4.9985376818676946e-05, "loss": 2.3537, "step": 778 }, { "epoch": 0.1, "grad_norm": 0.298828125, "learning_rate": 4.998530434419808e-05, "loss": 2.3445, "step": 779 }, { "epoch": 0.1, "grad_norm": 0.306640625, "learning_rate": 4.9985231690619005e-05, "loss": 2.3704, "step": 780 }, { "epoch": 0.1, "grad_norm": 0.294921875, "learning_rate": 4.998515885794023e-05, "loss": 2.3619, "step": 781 }, { "epoch": 0.1, "grad_norm": 0.30078125, "learning_rate": 4.998508584616228e-05, "loss": 2.3582, "step": 782 }, { "epoch": 0.1, "grad_norm": 0.310546875, "learning_rate": 4.998501265528569e-05, "loss": 2.3304, "step": 783 }, { "epoch": 0.1, "grad_norm": 0.326171875, "learning_rate": 4.998493928531097e-05, "loss": 2.3835, "step": 784 }, { "epoch": 0.1, "grad_norm": 0.302734375, "learning_rate": 4.998486573623865e-05, "loss": 2.3823, "step": 785 }, { "epoch": 0.1, "grad_norm": 0.298828125, "learning_rate": 4.9984792008069264e-05, "loss": 2.359, "step": 786 }, { "epoch": 0.1, "grad_norm": 0.29296875, "learning_rate": 4.998471810080333e-05, "loss": 2.3825, "step": 787 }, { "epoch": 0.11, "grad_norm": 0.31640625, "learning_rate": 4.9984644014441386e-05, "loss": 2.3344, "step": 788 }, { "epoch": 0.11, "grad_norm": 0.322265625, "learning_rate": 4.9984569748983955e-05, "loss": 2.3651, "step": 789 }, { "epoch": 0.11, "grad_norm": 0.3203125, "learning_rate": 4.998449530443158e-05, "loss": 2.3426, "step": 790 }, { "epoch": 0.11, "grad_norm": 0.302734375, "learning_rate": 4.998442068078479e-05, "loss": 2.3782, "step": 791 }, { "epoch": 0.11, "grad_norm": 0.3046875, "learning_rate": 4.9984345878044116e-05, "loss": 2.3571, "step": 792 }, { "epoch": 0.11, "grad_norm": 0.2890625, "learning_rate": 4.9984270896210094e-05, "loss": 2.3319, "step": 793 }, { "epoch": 0.11, "grad_norm": 0.302734375, "learning_rate": 4.998419573528327e-05, "loss": 2.39, "step": 794 }, { "epoch": 0.11, "grad_norm": 0.3125, "learning_rate": 4.998412039526417e-05, "loss": 2.395, "step": 795 }, { "epoch": 0.11, "grad_norm": 0.3125, "learning_rate": 4.9984044876153345e-05, "loss": 2.3515, "step": 796 }, { "epoch": 0.11, "grad_norm": 0.318359375, "learning_rate": 4.9983969177951325e-05, "loss": 2.3973, "step": 797 }, { "epoch": 0.11, "grad_norm": 0.298828125, "learning_rate": 4.998389330065867e-05, "loss": 2.3418, "step": 798 }, { "epoch": 0.11, "grad_norm": 0.298828125, "learning_rate": 4.998381724427591e-05, "loss": 2.3526, "step": 799 }, { "epoch": 0.11, "grad_norm": 0.294921875, "learning_rate": 4.99837410088036e-05, "loss": 2.3608, "step": 800 }, { "epoch": 0.11, "grad_norm": 0.2890625, "learning_rate": 4.9983664594242275e-05, "loss": 2.3643, "step": 801 }, { "epoch": 0.11, "grad_norm": 0.302734375, "learning_rate": 4.998358800059249e-05, "loss": 2.3659, "step": 802 }, { "epoch": 0.11, "grad_norm": 0.318359375, "learning_rate": 4.9983511227854794e-05, "loss": 2.3106, "step": 803 }, { "epoch": 0.11, "grad_norm": 0.287109375, "learning_rate": 4.9983434276029734e-05, "loss": 2.328, "step": 804 }, { "epoch": 0.11, "grad_norm": 0.326171875, "learning_rate": 4.998335714511787e-05, "loss": 2.3492, "step": 805 }, { "epoch": 0.11, "grad_norm": 0.322265625, "learning_rate": 4.9983279835119736e-05, "loss": 2.3582, "step": 806 }, { "epoch": 0.11, "grad_norm": 0.30078125, "learning_rate": 4.998320234603591e-05, "loss": 2.3842, "step": 807 }, { "epoch": 0.11, "grad_norm": 0.296875, "learning_rate": 4.998312467786693e-05, "loss": 2.3728, "step": 808 }, { "epoch": 0.11, "grad_norm": 0.3203125, "learning_rate": 4.9983046830613354e-05, "loss": 2.3826, "step": 809 }, { "epoch": 0.11, "grad_norm": 0.318359375, "learning_rate": 4.998296880427575e-05, "loss": 2.3863, "step": 810 }, { "epoch": 0.11, "grad_norm": 0.287109375, "learning_rate": 4.998289059885468e-05, "loss": 2.3865, "step": 811 }, { "epoch": 0.11, "grad_norm": 0.267578125, "learning_rate": 4.9982812214350684e-05, "loss": 2.3511, "step": 812 }, { "epoch": 0.11, "grad_norm": 0.337890625, "learning_rate": 4.998273365076434e-05, "loss": 2.3831, "step": 813 }, { "epoch": 0.11, "grad_norm": 0.322265625, "learning_rate": 4.9982654908096216e-05, "loss": 2.3929, "step": 814 }, { "epoch": 0.11, "grad_norm": 0.30078125, "learning_rate": 4.998257598634686e-05, "loss": 2.391, "step": 815 }, { "epoch": 0.11, "grad_norm": 0.31640625, "learning_rate": 4.998249688551685e-05, "loss": 2.3757, "step": 816 }, { "epoch": 0.11, "grad_norm": 0.298828125, "learning_rate": 4.998241760560675e-05, "loss": 2.3768, "step": 817 }, { "epoch": 0.11, "grad_norm": 0.30078125, "learning_rate": 4.998233814661712e-05, "loss": 2.3832, "step": 818 }, { "epoch": 0.11, "grad_norm": 0.298828125, "learning_rate": 4.998225850854854e-05, "loss": 2.3788, "step": 819 }, { "epoch": 0.11, "grad_norm": 0.294921875, "learning_rate": 4.998217869140158e-05, "loss": 2.36, "step": 820 }, { "epoch": 0.11, "grad_norm": 0.31640625, "learning_rate": 4.9982098695176804e-05, "loss": 2.3797, "step": 821 }, { "epoch": 0.11, "grad_norm": 0.296875, "learning_rate": 4.99820185198748e-05, "loss": 2.3786, "step": 822 }, { "epoch": 0.11, "grad_norm": 0.306640625, "learning_rate": 4.9981938165496124e-05, "loss": 2.3834, "step": 823 }, { "epoch": 0.11, "grad_norm": 0.296875, "learning_rate": 4.9981857632041375e-05, "loss": 2.3668, "step": 824 }, { "epoch": 0.11, "grad_norm": 0.3203125, "learning_rate": 4.99817769195111e-05, "loss": 2.3878, "step": 825 }, { "epoch": 0.11, "grad_norm": 0.310546875, "learning_rate": 4.998169602790591e-05, "loss": 2.3622, "step": 826 }, { "epoch": 0.11, "grad_norm": 0.294921875, "learning_rate": 4.9981614957226365e-05, "loss": 2.3718, "step": 827 }, { "epoch": 0.11, "grad_norm": 0.330078125, "learning_rate": 4.998153370747305e-05, "loss": 2.3546, "step": 828 }, { "epoch": 0.11, "grad_norm": 0.318359375, "learning_rate": 4.998145227864654e-05, "loss": 2.3396, "step": 829 }, { "epoch": 0.11, "grad_norm": 0.296875, "learning_rate": 4.998137067074744e-05, "loss": 2.3343, "step": 830 }, { "epoch": 0.11, "grad_norm": 0.326171875, "learning_rate": 4.998128888377632e-05, "loss": 2.355, "step": 831 }, { "epoch": 0.11, "grad_norm": 0.322265625, "learning_rate": 4.9981206917733756e-05, "loss": 2.3874, "step": 832 }, { "epoch": 0.11, "grad_norm": 0.3359375, "learning_rate": 4.9981124772620365e-05, "loss": 2.3401, "step": 833 }, { "epoch": 0.11, "grad_norm": 0.298828125, "learning_rate": 4.998104244843671e-05, "loss": 2.355, "step": 834 }, { "epoch": 0.11, "grad_norm": 0.322265625, "learning_rate": 4.998095994518339e-05, "loss": 2.3393, "step": 835 }, { "epoch": 0.11, "grad_norm": 0.330078125, "learning_rate": 4.998087726286099e-05, "loss": 2.3565, "step": 836 }, { "epoch": 0.11, "grad_norm": 0.31640625, "learning_rate": 4.998079440147012e-05, "loss": 2.3718, "step": 837 }, { "epoch": 0.11, "grad_norm": 0.32421875, "learning_rate": 4.9980711361011355e-05, "loss": 2.3534, "step": 838 }, { "epoch": 0.11, "grad_norm": 0.3203125, "learning_rate": 4.99806281414853e-05, "loss": 2.3744, "step": 839 }, { "epoch": 0.11, "grad_norm": 0.291015625, "learning_rate": 4.998054474289256e-05, "loss": 2.3796, "step": 840 }, { "epoch": 0.11, "grad_norm": 0.302734375, "learning_rate": 4.9980461165233706e-05, "loss": 2.352, "step": 841 }, { "epoch": 0.11, "grad_norm": 0.3125, "learning_rate": 4.998037740850936e-05, "loss": 2.3659, "step": 842 }, { "epoch": 0.11, "grad_norm": 0.302734375, "learning_rate": 4.998029347272011e-05, "loss": 2.3628, "step": 843 }, { "epoch": 0.11, "grad_norm": 0.32421875, "learning_rate": 4.998020935786657e-05, "loss": 2.3522, "step": 844 }, { "epoch": 0.11, "grad_norm": 0.333984375, "learning_rate": 4.9980125063949335e-05, "loss": 2.3712, "step": 845 }, { "epoch": 0.11, "grad_norm": 0.318359375, "learning_rate": 4.9980040590969014e-05, "loss": 2.3704, "step": 846 }, { "epoch": 0.11, "grad_norm": 0.302734375, "learning_rate": 4.9979955938926206e-05, "loss": 2.3722, "step": 847 }, { "epoch": 0.11, "grad_norm": 0.314453125, "learning_rate": 4.997987110782152e-05, "loss": 2.3377, "step": 848 }, { "epoch": 0.11, "grad_norm": 0.28125, "learning_rate": 4.997978609765557e-05, "loss": 2.3537, "step": 849 }, { "epoch": 0.11, "grad_norm": 0.357421875, "learning_rate": 4.997970090842895e-05, "loss": 2.3595, "step": 850 }, { "epoch": 0.11, "grad_norm": 0.29296875, "learning_rate": 4.997961554014229e-05, "loss": 2.3448, "step": 851 }, { "epoch": 0.11, "grad_norm": 0.3046875, "learning_rate": 4.9979529992796195e-05, "loss": 2.3628, "step": 852 }, { "epoch": 0.11, "grad_norm": 0.3125, "learning_rate": 4.9979444266391264e-05, "loss": 2.38, "step": 853 }, { "epoch": 0.11, "grad_norm": 0.306640625, "learning_rate": 4.997935836092813e-05, "loss": 2.3452, "step": 854 }, { "epoch": 0.11, "grad_norm": 0.31640625, "learning_rate": 4.9979272276407404e-05, "loss": 2.3323, "step": 855 }, { "epoch": 0.11, "grad_norm": 0.3046875, "learning_rate": 4.99791860128297e-05, "loss": 2.3691, "step": 856 }, { "epoch": 0.11, "grad_norm": 0.326171875, "learning_rate": 4.997909957019564e-05, "loss": 2.3312, "step": 857 }, { "epoch": 0.11, "grad_norm": 0.3125, "learning_rate": 4.997901294850585e-05, "loss": 2.3682, "step": 858 }, { "epoch": 0.11, "grad_norm": 0.296875, "learning_rate": 4.997892614776093e-05, "loss": 2.3405, "step": 859 }, { "epoch": 0.11, "grad_norm": 0.291015625, "learning_rate": 4.997883916796152e-05, "loss": 2.3393, "step": 860 }, { "epoch": 0.11, "grad_norm": 0.29296875, "learning_rate": 4.997875200910824e-05, "loss": 2.388, "step": 861 }, { "epoch": 0.11, "grad_norm": 0.322265625, "learning_rate": 4.9978664671201706e-05, "loss": 2.3753, "step": 862 }, { "epoch": 0.12, "grad_norm": 0.330078125, "learning_rate": 4.997857715424256e-05, "loss": 2.3012, "step": 863 }, { "epoch": 0.12, "grad_norm": 0.33203125, "learning_rate": 4.9978489458231413e-05, "loss": 2.3452, "step": 864 }, { "epoch": 0.12, "grad_norm": 0.3046875, "learning_rate": 4.99784015831689e-05, "loss": 2.3613, "step": 865 }, { "epoch": 0.12, "grad_norm": 0.29296875, "learning_rate": 4.997831352905566e-05, "loss": 2.3419, "step": 866 }, { "epoch": 0.12, "grad_norm": 0.318359375, "learning_rate": 4.997822529589231e-05, "loss": 2.3643, "step": 867 }, { "epoch": 0.12, "grad_norm": 0.32421875, "learning_rate": 4.9978136883679496e-05, "loss": 2.3741, "step": 868 }, { "epoch": 0.12, "grad_norm": 0.294921875, "learning_rate": 4.9978048292417836e-05, "loss": 2.3652, "step": 869 }, { "epoch": 0.12, "grad_norm": 0.3046875, "learning_rate": 4.9977959522107974e-05, "loss": 2.3393, "step": 870 }, { "epoch": 0.12, "grad_norm": 0.3125, "learning_rate": 4.997787057275055e-05, "loss": 2.3284, "step": 871 }, { "epoch": 0.12, "grad_norm": 0.2890625, "learning_rate": 4.9977781444346195e-05, "loss": 2.3533, "step": 872 }, { "epoch": 0.12, "grad_norm": 0.302734375, "learning_rate": 4.9977692136895546e-05, "loss": 2.3438, "step": 873 }, { "epoch": 0.12, "grad_norm": 0.306640625, "learning_rate": 4.9977602650399256e-05, "loss": 2.3321, "step": 874 }, { "epoch": 0.12, "grad_norm": 0.302734375, "learning_rate": 4.9977512984857954e-05, "loss": 2.3506, "step": 875 }, { "epoch": 0.12, "grad_norm": 0.30078125, "learning_rate": 4.9977423140272285e-05, "loss": 2.3642, "step": 876 }, { "epoch": 0.12, "grad_norm": 0.32421875, "learning_rate": 4.99773331166429e-05, "loss": 2.3684, "step": 877 }, { "epoch": 0.12, "grad_norm": 0.298828125, "learning_rate": 4.997724291397043e-05, "loss": 2.3465, "step": 878 }, { "epoch": 0.12, "grad_norm": 0.291015625, "learning_rate": 4.9977152532255536e-05, "loss": 2.3731, "step": 879 }, { "epoch": 0.12, "grad_norm": 0.314453125, "learning_rate": 4.997706197149886e-05, "loss": 2.3543, "step": 880 }, { "epoch": 0.12, "grad_norm": 0.3046875, "learning_rate": 4.997697123170105e-05, "loss": 2.3833, "step": 881 }, { "epoch": 0.12, "grad_norm": 0.302734375, "learning_rate": 4.9976880312862764e-05, "loss": 2.3667, "step": 882 }, { "epoch": 0.12, "grad_norm": 0.337890625, "learning_rate": 4.997678921498464e-05, "loss": 2.3712, "step": 883 }, { "epoch": 0.12, "grad_norm": 0.3125, "learning_rate": 4.997669793806735e-05, "loss": 2.3681, "step": 884 }, { "epoch": 0.12, "grad_norm": 0.310546875, "learning_rate": 4.9976606482111524e-05, "loss": 2.3626, "step": 885 }, { "epoch": 0.12, "grad_norm": 0.306640625, "learning_rate": 4.997651484711784e-05, "loss": 2.3769, "step": 886 }, { "epoch": 0.12, "grad_norm": 0.294921875, "learning_rate": 4.997642303308694e-05, "loss": 2.3538, "step": 887 }, { "epoch": 0.12, "grad_norm": 0.314453125, "learning_rate": 4.9976331040019484e-05, "loss": 2.3571, "step": 888 }, { "epoch": 0.12, "grad_norm": 0.310546875, "learning_rate": 4.9976238867916146e-05, "loss": 2.359, "step": 889 }, { "epoch": 0.12, "grad_norm": 0.302734375, "learning_rate": 4.997614651677757e-05, "loss": 2.3672, "step": 890 }, { "epoch": 0.12, "grad_norm": 0.3203125, "learning_rate": 4.997605398660442e-05, "loss": 2.3714, "step": 891 }, { "epoch": 0.12, "grad_norm": 0.326171875, "learning_rate": 4.997596127739737e-05, "loss": 2.3938, "step": 892 }, { "epoch": 0.12, "grad_norm": 0.31640625, "learning_rate": 4.997586838915708e-05, "loss": 2.3522, "step": 893 }, { "epoch": 0.12, "grad_norm": 0.3203125, "learning_rate": 4.9975775321884204e-05, "loss": 2.3123, "step": 894 }, { "epoch": 0.12, "grad_norm": 0.31640625, "learning_rate": 4.997568207557942e-05, "loss": 2.3664, "step": 895 }, { "epoch": 0.12, "grad_norm": 0.298828125, "learning_rate": 4.99755886502434e-05, "loss": 2.3324, "step": 896 }, { "epoch": 0.12, "grad_norm": 0.302734375, "learning_rate": 4.9975495045876805e-05, "loss": 2.3435, "step": 897 }, { "epoch": 0.12, "grad_norm": 0.330078125, "learning_rate": 4.997540126248032e-05, "loss": 2.3407, "step": 898 }, { "epoch": 0.12, "grad_norm": 0.306640625, "learning_rate": 4.997530730005459e-05, "loss": 2.3627, "step": 899 }, { "epoch": 0.12, "grad_norm": 0.3203125, "learning_rate": 4.997521315860032e-05, "loss": 2.3423, "step": 900 }, { "epoch": 0.12, "grad_norm": 0.29296875, "learning_rate": 4.9975118838118154e-05, "loss": 2.365, "step": 901 }, { "epoch": 0.12, "grad_norm": 0.298828125, "learning_rate": 4.99750243386088e-05, "loss": 2.343, "step": 902 }, { "epoch": 0.12, "grad_norm": 0.302734375, "learning_rate": 4.997492966007291e-05, "loss": 2.3413, "step": 903 }, { "epoch": 0.12, "grad_norm": 0.3125, "learning_rate": 4.9974834802511175e-05, "loss": 2.3715, "step": 904 }, { "epoch": 0.12, "grad_norm": 0.310546875, "learning_rate": 4.997473976592427e-05, "loss": 2.3777, "step": 905 }, { "epoch": 0.12, "grad_norm": 0.3046875, "learning_rate": 4.997464455031289e-05, "loss": 2.339, "step": 906 }, { "epoch": 0.12, "grad_norm": 0.306640625, "learning_rate": 4.99745491556777e-05, "loss": 2.3268, "step": 907 }, { "epoch": 0.12, "grad_norm": 0.31640625, "learning_rate": 4.997445358201939e-05, "loss": 2.3353, "step": 908 }, { "epoch": 0.12, "grad_norm": 0.3125, "learning_rate": 4.9974357829338635e-05, "loss": 2.35, "step": 909 }, { "epoch": 0.12, "grad_norm": 0.29296875, "learning_rate": 4.9974261897636146e-05, "loss": 2.3242, "step": 910 }, { "epoch": 0.12, "grad_norm": 0.296875, "learning_rate": 4.9974165786912586e-05, "loss": 2.3623, "step": 911 }, { "epoch": 0.12, "grad_norm": 0.294921875, "learning_rate": 4.997406949716866e-05, "loss": 2.3269, "step": 912 }, { "epoch": 0.12, "grad_norm": 0.32421875, "learning_rate": 4.997397302840505e-05, "loss": 2.3735, "step": 913 }, { "epoch": 0.12, "grad_norm": 0.314453125, "learning_rate": 4.9973876380622455e-05, "loss": 2.3554, "step": 914 }, { "epoch": 0.12, "grad_norm": 0.3125, "learning_rate": 4.997377955382155e-05, "loss": 2.3587, "step": 915 }, { "epoch": 0.12, "grad_norm": 0.30859375, "learning_rate": 4.997368254800305e-05, "loss": 2.3336, "step": 916 }, { "epoch": 0.12, "grad_norm": 0.30078125, "learning_rate": 4.997358536316765e-05, "loss": 2.3111, "step": 917 }, { "epoch": 0.12, "grad_norm": 0.30859375, "learning_rate": 4.997348799931602e-05, "loss": 2.3308, "step": 918 }, { "epoch": 0.12, "grad_norm": 0.322265625, "learning_rate": 4.9973390456448895e-05, "loss": 2.3568, "step": 919 }, { "epoch": 0.12, "grad_norm": 0.296875, "learning_rate": 4.9973292734566944e-05, "loss": 2.3282, "step": 920 }, { "epoch": 0.12, "grad_norm": 0.291015625, "learning_rate": 4.997319483367089e-05, "loss": 2.3654, "step": 921 }, { "epoch": 0.12, "grad_norm": 0.30078125, "learning_rate": 4.997309675376142e-05, "loss": 2.3522, "step": 922 }, { "epoch": 0.12, "grad_norm": 0.3046875, "learning_rate": 4.9972998494839235e-05, "loss": 2.3629, "step": 923 }, { "epoch": 0.12, "grad_norm": 0.30859375, "learning_rate": 4.9972900056905046e-05, "loss": 2.3586, "step": 924 }, { "epoch": 0.12, "grad_norm": 0.291015625, "learning_rate": 4.9972801439959565e-05, "loss": 2.3674, "step": 925 }, { "epoch": 0.12, "grad_norm": 0.30859375, "learning_rate": 4.997270264400349e-05, "loss": 2.3544, "step": 926 }, { "epoch": 0.12, "grad_norm": 0.29296875, "learning_rate": 4.997260366903753e-05, "loss": 2.382, "step": 927 }, { "epoch": 0.12, "grad_norm": 0.3125, "learning_rate": 4.997250451506239e-05, "loss": 2.393, "step": 928 }, { "epoch": 0.12, "grad_norm": 0.29296875, "learning_rate": 4.9972405182078795e-05, "loss": 2.3622, "step": 929 }, { "epoch": 0.12, "grad_norm": 0.3203125, "learning_rate": 4.997230567008745e-05, "loss": 2.3688, "step": 930 }, { "epoch": 0.12, "grad_norm": 0.287109375, "learning_rate": 4.997220597908906e-05, "loss": 2.3548, "step": 931 }, { "epoch": 0.12, "grad_norm": 0.3828125, "learning_rate": 4.997210610908435e-05, "loss": 2.3426, "step": 932 }, { "epoch": 0.12, "grad_norm": 0.306640625, "learning_rate": 4.997200606007403e-05, "loss": 2.3282, "step": 933 }, { "epoch": 0.12, "grad_norm": 0.3125, "learning_rate": 4.9971905832058816e-05, "loss": 2.3791, "step": 934 }, { "epoch": 0.12, "grad_norm": 0.318359375, "learning_rate": 4.9971805425039434e-05, "loss": 2.336, "step": 935 }, { "epoch": 0.12, "grad_norm": 0.2890625, "learning_rate": 4.99717048390166e-05, "loss": 2.3112, "step": 936 }, { "epoch": 0.12, "grad_norm": 0.298828125, "learning_rate": 4.997160407399104e-05, "loss": 2.3589, "step": 937 }, { "epoch": 0.13, "grad_norm": 0.296875, "learning_rate": 4.997150312996346e-05, "loss": 2.3759, "step": 938 }, { "epoch": 0.13, "grad_norm": 0.32421875, "learning_rate": 4.99714020069346e-05, "loss": 2.3366, "step": 939 }, { "epoch": 0.13, "grad_norm": 0.302734375, "learning_rate": 4.9971300704905176e-05, "loss": 2.3617, "step": 940 }, { "epoch": 0.13, "grad_norm": 0.30078125, "learning_rate": 4.997119922387593e-05, "loss": 2.3538, "step": 941 }, { "epoch": 0.13, "grad_norm": 0.28125, "learning_rate": 4.997109756384757e-05, "loss": 2.3438, "step": 942 }, { "epoch": 0.13, "grad_norm": 0.314453125, "learning_rate": 4.9970995724820824e-05, "loss": 2.3423, "step": 943 }, { "epoch": 0.13, "grad_norm": 0.287109375, "learning_rate": 4.997089370679644e-05, "loss": 2.3984, "step": 944 }, { "epoch": 0.13, "grad_norm": 0.296875, "learning_rate": 4.997079150977513e-05, "loss": 2.3649, "step": 945 }, { "epoch": 0.13, "grad_norm": 0.298828125, "learning_rate": 4.9970689133757645e-05, "loss": 2.2982, "step": 946 }, { "epoch": 0.13, "grad_norm": 0.333984375, "learning_rate": 4.997058657874471e-05, "loss": 2.3959, "step": 947 }, { "epoch": 0.13, "grad_norm": 0.30078125, "learning_rate": 4.9970483844737046e-05, "loss": 2.3428, "step": 948 }, { "epoch": 0.13, "grad_norm": 0.296875, "learning_rate": 4.997038093173542e-05, "loss": 2.3513, "step": 949 }, { "epoch": 0.13, "grad_norm": 0.279296875, "learning_rate": 4.9970277839740545e-05, "loss": 2.346, "step": 950 }, { "epoch": 0.13, "grad_norm": 0.3046875, "learning_rate": 4.997017456875316e-05, "loss": 2.3417, "step": 951 }, { "epoch": 0.13, "grad_norm": 0.28515625, "learning_rate": 4.997007111877402e-05, "loss": 2.3831, "step": 952 }, { "epoch": 0.13, "grad_norm": 0.296875, "learning_rate": 4.9969967489803857e-05, "loss": 2.3583, "step": 953 }, { "epoch": 0.13, "grad_norm": 0.30859375, "learning_rate": 4.9969863681843426e-05, "loss": 2.3699, "step": 954 }, { "epoch": 0.13, "grad_norm": 0.306640625, "learning_rate": 4.9969759694893455e-05, "loss": 2.3278, "step": 955 }, { "epoch": 0.13, "grad_norm": 0.291015625, "learning_rate": 4.9969655528954695e-05, "loss": 2.3535, "step": 956 }, { "epoch": 0.13, "grad_norm": 0.30859375, "learning_rate": 4.99695511840279e-05, "loss": 2.3616, "step": 957 }, { "epoch": 0.13, "grad_norm": 0.306640625, "learning_rate": 4.99694466601138e-05, "loss": 2.3174, "step": 958 }, { "epoch": 0.13, "grad_norm": 0.296875, "learning_rate": 4.996934195721316e-05, "loss": 2.3722, "step": 959 }, { "epoch": 0.13, "grad_norm": 0.310546875, "learning_rate": 4.996923707532673e-05, "loss": 2.3719, "step": 960 }, { "epoch": 0.13, "grad_norm": 0.302734375, "learning_rate": 4.996913201445526e-05, "loss": 2.3491, "step": 961 }, { "epoch": 0.13, "grad_norm": 0.326171875, "learning_rate": 4.9969026774599494e-05, "loss": 2.3544, "step": 962 }, { "epoch": 0.13, "grad_norm": 0.29296875, "learning_rate": 4.99689213557602e-05, "loss": 2.377, "step": 963 }, { "epoch": 0.13, "grad_norm": 0.30078125, "learning_rate": 4.996881575793813e-05, "loss": 2.3516, "step": 964 }, { "epoch": 0.13, "grad_norm": 0.3125, "learning_rate": 4.9968709981134024e-05, "loss": 2.3465, "step": 965 }, { "epoch": 0.13, "grad_norm": 0.291015625, "learning_rate": 4.9968604025348666e-05, "loss": 2.3555, "step": 966 }, { "epoch": 0.13, "grad_norm": 0.30859375, "learning_rate": 4.99684978905828e-05, "loss": 2.3671, "step": 967 }, { "epoch": 0.13, "grad_norm": 0.296875, "learning_rate": 4.996839157683719e-05, "loss": 2.3565, "step": 968 }, { "epoch": 0.13, "grad_norm": 0.283203125, "learning_rate": 4.996828508411261e-05, "loss": 2.3551, "step": 969 }, { "epoch": 0.13, "grad_norm": 0.298828125, "learning_rate": 4.9968178412409805e-05, "loss": 2.3245, "step": 970 }, { "epoch": 0.13, "grad_norm": 0.3125, "learning_rate": 4.9968071561729544e-05, "loss": 2.3642, "step": 971 }, { "epoch": 0.13, "grad_norm": 0.298828125, "learning_rate": 4.99679645320726e-05, "loss": 2.3485, "step": 972 }, { "epoch": 0.13, "grad_norm": 0.2890625, "learning_rate": 4.996785732343973e-05, "loss": 2.3605, "step": 973 }, { "epoch": 0.13, "grad_norm": 0.306640625, "learning_rate": 4.996774993583171e-05, "loss": 2.3709, "step": 974 }, { "epoch": 0.13, "grad_norm": 0.31640625, "learning_rate": 4.996764236924931e-05, "loss": 2.3433, "step": 975 }, { "epoch": 0.13, "grad_norm": 0.294921875, "learning_rate": 4.99675346236933e-05, "loss": 2.3665, "step": 976 }, { "epoch": 0.13, "grad_norm": 0.31640625, "learning_rate": 4.996742669916446e-05, "loss": 2.3362, "step": 977 }, { "epoch": 0.13, "grad_norm": 0.310546875, "learning_rate": 4.9967318595663534e-05, "loss": 2.3576, "step": 978 }, { "epoch": 0.13, "grad_norm": 0.29296875, "learning_rate": 4.996721031319134e-05, "loss": 2.3287, "step": 979 }, { "epoch": 0.13, "grad_norm": 0.3359375, "learning_rate": 4.9967101851748624e-05, "loss": 2.3179, "step": 980 }, { "epoch": 0.13, "grad_norm": 0.28125, "learning_rate": 4.996699321133617e-05, "loss": 2.3295, "step": 981 }, { "epoch": 0.13, "grad_norm": 0.29296875, "learning_rate": 4.996688439195476e-05, "loss": 2.3353, "step": 982 }, { "epoch": 0.13, "grad_norm": 0.30078125, "learning_rate": 4.996677539360517e-05, "loss": 2.3477, "step": 983 }, { "epoch": 0.13, "grad_norm": 0.30078125, "learning_rate": 4.996666621628819e-05, "loss": 2.354, "step": 984 }, { "epoch": 0.13, "grad_norm": 0.302734375, "learning_rate": 4.9966556860004596e-05, "loss": 2.3479, "step": 985 }, { "epoch": 0.13, "grad_norm": 0.31640625, "learning_rate": 4.996644732475517e-05, "loss": 2.3293, "step": 986 }, { "epoch": 0.13, "grad_norm": 0.322265625, "learning_rate": 4.9966337610540695e-05, "loss": 2.3224, "step": 987 }, { "epoch": 0.13, "grad_norm": 0.322265625, "learning_rate": 4.996622771736197e-05, "loss": 2.3399, "step": 988 }, { "epoch": 0.13, "grad_norm": 0.3046875, "learning_rate": 4.9966117645219774e-05, "loss": 2.3572, "step": 989 }, { "epoch": 0.13, "grad_norm": 0.30859375, "learning_rate": 4.99660073941149e-05, "loss": 2.3564, "step": 990 }, { "epoch": 0.13, "grad_norm": 0.302734375, "learning_rate": 4.996589696404813e-05, "loss": 2.3242, "step": 991 }, { "epoch": 0.13, "grad_norm": 0.322265625, "learning_rate": 4.996578635502026e-05, "loss": 2.3513, "step": 992 }, { "epoch": 0.13, "grad_norm": 0.318359375, "learning_rate": 4.996567556703208e-05, "loss": 2.3536, "step": 993 }, { "epoch": 0.13, "grad_norm": 0.291015625, "learning_rate": 4.996556460008439e-05, "loss": 2.3786, "step": 994 }, { "epoch": 0.13, "grad_norm": 0.3359375, "learning_rate": 4.996545345417799e-05, "loss": 2.309, "step": 995 }, { "epoch": 0.13, "grad_norm": 0.30078125, "learning_rate": 4.996534212931366e-05, "loss": 2.3516, "step": 996 }, { "epoch": 0.13, "grad_norm": 0.298828125, "learning_rate": 4.996523062549222e-05, "loss": 2.3105, "step": 997 }, { "epoch": 0.13, "grad_norm": 0.32421875, "learning_rate": 4.996511894271445e-05, "loss": 2.3948, "step": 998 }, { "epoch": 0.13, "grad_norm": 0.287109375, "learning_rate": 4.996500708098116e-05, "loss": 2.3323, "step": 999 }, { "epoch": 0.13, "grad_norm": 0.302734375, "learning_rate": 4.9964895040293154e-05, "loss": 2.3264, "step": 1000 }, { "epoch": 0.13, "eval_loss": 2.3436667919158936, "eval_runtime": 616.9061, "eval_samples_per_second": 62.847, "eval_steps_per_second": 7.857, "step": 1000 }, { "epoch": 0.13, "grad_norm": 0.306640625, "learning_rate": 4.996478282065122e-05, "loss": 2.3661, "step": 1001 }, { "epoch": 0.13, "grad_norm": 0.30078125, "learning_rate": 4.996467042205618e-05, "loss": 2.3248, "step": 1002 }, { "epoch": 0.13, "grad_norm": 0.296875, "learning_rate": 4.996455784450884e-05, "loss": 2.3429, "step": 1003 }, { "epoch": 0.13, "grad_norm": 0.3125, "learning_rate": 4.9964445088009985e-05, "loss": 2.3498, "step": 1004 }, { "epoch": 0.13, "grad_norm": 0.326171875, "learning_rate": 4.9964332152560445e-05, "loss": 2.3401, "step": 1005 }, { "epoch": 0.13, "grad_norm": 0.318359375, "learning_rate": 4.9964219038161034e-05, "loss": 2.3635, "step": 1006 }, { "epoch": 0.13, "grad_norm": 0.30078125, "learning_rate": 4.9964105744812535e-05, "loss": 2.3819, "step": 1007 }, { "epoch": 0.13, "grad_norm": 0.310546875, "learning_rate": 4.996399227251578e-05, "loss": 2.3693, "step": 1008 }, { "epoch": 0.13, "grad_norm": 0.296875, "learning_rate": 4.9963878621271586e-05, "loss": 2.3661, "step": 1009 }, { "epoch": 0.13, "grad_norm": 0.326171875, "learning_rate": 4.9963764791080755e-05, "loss": 2.3366, "step": 1010 }, { "epoch": 0.13, "grad_norm": 0.333984375, "learning_rate": 4.996365078194411e-05, "loss": 2.3567, "step": 1011 }, { "epoch": 0.13, "grad_norm": 0.3125, "learning_rate": 4.996353659386247e-05, "loss": 2.3598, "step": 1012 }, { "epoch": 0.14, "grad_norm": 0.294921875, "learning_rate": 4.996342222683664e-05, "loss": 2.3869, "step": 1013 }, { "epoch": 0.14, "grad_norm": 0.318359375, "learning_rate": 4.996330768086747e-05, "loss": 2.3356, "step": 1014 }, { "epoch": 0.14, "grad_norm": 0.333984375, "learning_rate": 4.996319295595574e-05, "loss": 2.3417, "step": 1015 }, { "epoch": 0.14, "grad_norm": 0.3203125, "learning_rate": 4.996307805210231e-05, "loss": 2.3414, "step": 1016 }, { "epoch": 0.14, "grad_norm": 0.32421875, "learning_rate": 4.996296296930797e-05, "loss": 2.349, "step": 1017 }, { "epoch": 0.14, "grad_norm": 0.283203125, "learning_rate": 4.996284770757358e-05, "loss": 2.3435, "step": 1018 }, { "epoch": 0.14, "grad_norm": 0.3203125, "learning_rate": 4.9962732266899935e-05, "loss": 2.3592, "step": 1019 }, { "epoch": 0.14, "grad_norm": 0.314453125, "learning_rate": 4.9962616647287886e-05, "loss": 2.3701, "step": 1020 }, { "epoch": 0.14, "grad_norm": 0.30859375, "learning_rate": 4.996250084873825e-05, "loss": 2.3324, "step": 1021 }, { "epoch": 0.14, "grad_norm": 0.29296875, "learning_rate": 4.996238487125185e-05, "loss": 2.3267, "step": 1022 }, { "epoch": 0.14, "grad_norm": 0.30078125, "learning_rate": 4.996226871482954e-05, "loss": 2.362, "step": 1023 }, { "epoch": 0.14, "grad_norm": 0.333984375, "learning_rate": 4.996215237947214e-05, "loss": 2.3619, "step": 1024 }, { "epoch": 0.14, "grad_norm": 0.302734375, "learning_rate": 4.996203586518047e-05, "loss": 2.3574, "step": 1025 }, { "epoch": 0.14, "grad_norm": 0.31640625, "learning_rate": 4.9961919171955385e-05, "loss": 2.3458, "step": 1026 }, { "epoch": 0.14, "grad_norm": 0.310546875, "learning_rate": 4.996180229979771e-05, "loss": 2.3343, "step": 1027 }, { "epoch": 0.14, "grad_norm": 0.298828125, "learning_rate": 4.996168524870829e-05, "loss": 2.3575, "step": 1028 }, { "epoch": 0.14, "grad_norm": 0.322265625, "learning_rate": 4.9961568018687965e-05, "loss": 2.3541, "step": 1029 }, { "epoch": 0.14, "grad_norm": 0.28125, "learning_rate": 4.996145060973757e-05, "loss": 2.3551, "step": 1030 }, { "epoch": 0.14, "grad_norm": 0.298828125, "learning_rate": 4.9961333021857945e-05, "loss": 2.3486, "step": 1031 }, { "epoch": 0.14, "grad_norm": 0.326171875, "learning_rate": 4.996121525504994e-05, "loss": 2.3332, "step": 1032 }, { "epoch": 0.14, "grad_norm": 0.3046875, "learning_rate": 4.996109730931439e-05, "loss": 2.3174, "step": 1033 }, { "epoch": 0.14, "grad_norm": 0.291015625, "learning_rate": 4.996097918465215e-05, "loss": 2.3615, "step": 1034 }, { "epoch": 0.14, "grad_norm": 0.3046875, "learning_rate": 4.996086088106406e-05, "loss": 2.3543, "step": 1035 }, { "epoch": 0.14, "grad_norm": 0.30859375, "learning_rate": 4.9960742398550976e-05, "loss": 2.3298, "step": 1036 }, { "epoch": 0.14, "grad_norm": 0.30078125, "learning_rate": 4.9960623737113746e-05, "loss": 2.3635, "step": 1037 }, { "epoch": 0.14, "grad_norm": 0.328125, "learning_rate": 4.99605048967532e-05, "loss": 2.3319, "step": 1038 }, { "epoch": 0.14, "grad_norm": 0.30859375, "learning_rate": 4.996038587747022e-05, "loss": 2.323, "step": 1039 }, { "epoch": 0.14, "grad_norm": 0.310546875, "learning_rate": 4.996026667926564e-05, "loss": 2.3483, "step": 1040 }, { "epoch": 0.14, "grad_norm": 0.30078125, "learning_rate": 4.996014730214032e-05, "loss": 2.3392, "step": 1041 }, { "epoch": 0.14, "grad_norm": 0.302734375, "learning_rate": 4.996002774609512e-05, "loss": 2.3544, "step": 1042 }, { "epoch": 0.14, "grad_norm": 0.330078125, "learning_rate": 4.9959908011130885e-05, "loss": 2.3248, "step": 1043 }, { "epoch": 0.14, "grad_norm": 0.3125, "learning_rate": 4.9959788097248486e-05, "loss": 2.3493, "step": 1044 }, { "epoch": 0.14, "grad_norm": 0.3125, "learning_rate": 4.995966800444878e-05, "loss": 2.3353, "step": 1045 }, { "epoch": 0.14, "grad_norm": 0.337890625, "learning_rate": 4.995954773273262e-05, "loss": 2.3354, "step": 1046 }, { "epoch": 0.14, "grad_norm": 0.3125, "learning_rate": 4.9959427282100876e-05, "loss": 2.3477, "step": 1047 }, { "epoch": 0.14, "grad_norm": 0.298828125, "learning_rate": 4.9959306652554404e-05, "loss": 2.3402, "step": 1048 }, { "epoch": 0.14, "grad_norm": 0.302734375, "learning_rate": 4.995918584409408e-05, "loss": 2.3419, "step": 1049 }, { "epoch": 0.14, "grad_norm": 0.29296875, "learning_rate": 4.9959064856720764e-05, "loss": 2.3235, "step": 1050 }, { "epoch": 0.14, "grad_norm": 0.322265625, "learning_rate": 4.9958943690435324e-05, "loss": 2.3243, "step": 1051 }, { "epoch": 0.14, "grad_norm": 0.283203125, "learning_rate": 4.995882234523862e-05, "loss": 2.3107, "step": 1052 }, { "epoch": 0.14, "grad_norm": 0.29296875, "learning_rate": 4.995870082113154e-05, "loss": 2.352, "step": 1053 }, { "epoch": 0.14, "grad_norm": 0.302734375, "learning_rate": 4.995857911811493e-05, "loss": 2.3441, "step": 1054 }, { "epoch": 0.14, "grad_norm": 0.28125, "learning_rate": 4.995845723618969e-05, "loss": 2.3151, "step": 1055 }, { "epoch": 0.14, "grad_norm": 0.306640625, "learning_rate": 4.995833517535667e-05, "loss": 2.3372, "step": 1056 }, { "epoch": 0.14, "grad_norm": 0.30859375, "learning_rate": 4.995821293561677e-05, "loss": 2.3446, "step": 1057 }, { "epoch": 0.14, "grad_norm": 0.310546875, "learning_rate": 4.995809051697084e-05, "loss": 2.3358, "step": 1058 }, { "epoch": 0.14, "grad_norm": 0.30078125, "learning_rate": 4.995796791941977e-05, "loss": 2.3511, "step": 1059 }, { "epoch": 0.14, "grad_norm": 0.31640625, "learning_rate": 4.9957845142964445e-05, "loss": 2.3602, "step": 1060 }, { "epoch": 0.14, "grad_norm": 0.3046875, "learning_rate": 4.995772218760573e-05, "loss": 2.3424, "step": 1061 }, { "epoch": 0.14, "grad_norm": 0.2890625, "learning_rate": 4.995759905334452e-05, "loss": 2.3445, "step": 1062 }, { "epoch": 0.14, "grad_norm": 0.298828125, "learning_rate": 4.995747574018169e-05, "loss": 2.3809, "step": 1063 }, { "epoch": 0.14, "grad_norm": 0.322265625, "learning_rate": 4.995735224811812e-05, "loss": 2.3654, "step": 1064 }, { "epoch": 0.14, "grad_norm": 0.296875, "learning_rate": 4.9957228577154704e-05, "loss": 2.3233, "step": 1065 }, { "epoch": 0.14, "grad_norm": 0.310546875, "learning_rate": 4.995710472729234e-05, "loss": 2.316, "step": 1066 }, { "epoch": 0.14, "grad_norm": 0.302734375, "learning_rate": 4.9956980698531885e-05, "loss": 2.3453, "step": 1067 }, { "epoch": 0.14, "grad_norm": 0.30859375, "learning_rate": 4.995685649087425e-05, "loss": 2.3207, "step": 1068 }, { "epoch": 0.14, "grad_norm": 0.306640625, "learning_rate": 4.995673210432032e-05, "loss": 2.334, "step": 1069 }, { "epoch": 0.14, "grad_norm": 0.3203125, "learning_rate": 4.995660753887098e-05, "loss": 2.3326, "step": 1070 }, { "epoch": 0.14, "grad_norm": 0.30859375, "learning_rate": 4.9956482794527136e-05, "loss": 2.3492, "step": 1071 }, { "epoch": 0.14, "grad_norm": 0.310546875, "learning_rate": 4.9956357871289685e-05, "loss": 2.3558, "step": 1072 }, { "epoch": 0.14, "grad_norm": 0.3125, "learning_rate": 4.995623276915949e-05, "loss": 2.3363, "step": 1073 }, { "epoch": 0.14, "grad_norm": 0.2890625, "learning_rate": 4.995610748813749e-05, "loss": 2.297, "step": 1074 }, { "epoch": 0.14, "grad_norm": 0.296875, "learning_rate": 4.995598202822456e-05, "loss": 2.3213, "step": 1075 }, { "epoch": 0.14, "grad_norm": 0.318359375, "learning_rate": 4.995585638942161e-05, "loss": 2.3106, "step": 1076 }, { "epoch": 0.14, "grad_norm": 0.310546875, "learning_rate": 4.995573057172952e-05, "loss": 2.3369, "step": 1077 }, { "epoch": 0.14, "grad_norm": 0.306640625, "learning_rate": 4.995560457514921e-05, "loss": 2.3401, "step": 1078 }, { "epoch": 0.14, "grad_norm": 0.31640625, "learning_rate": 4.995547839968158e-05, "loss": 2.3354, "step": 1079 }, { "epoch": 0.14, "grad_norm": 0.279296875, "learning_rate": 4.995535204532753e-05, "loss": 2.3237, "step": 1080 }, { "epoch": 0.14, "grad_norm": 0.30859375, "learning_rate": 4.995522551208798e-05, "loss": 2.3366, "step": 1081 }, { "epoch": 0.14, "grad_norm": 0.326171875, "learning_rate": 4.9955098799963814e-05, "loss": 2.3389, "step": 1082 }, { "epoch": 0.14, "grad_norm": 0.326171875, "learning_rate": 4.995497190895596e-05, "loss": 2.3372, "step": 1083 }, { "epoch": 0.14, "grad_norm": 0.29296875, "learning_rate": 4.995484483906532e-05, "loss": 2.3465, "step": 1084 }, { "epoch": 0.14, "grad_norm": 0.296875, "learning_rate": 4.995471759029281e-05, "loss": 2.3314, "step": 1085 }, { "epoch": 0.14, "grad_norm": 0.326171875, "learning_rate": 4.995459016263933e-05, "loss": 2.3602, "step": 1086 }, { "epoch": 0.14, "grad_norm": 0.30859375, "learning_rate": 4.9954462556105794e-05, "loss": 2.3619, "step": 1087 }, { "epoch": 0.15, "grad_norm": 0.318359375, "learning_rate": 4.995433477069314e-05, "loss": 2.3374, "step": 1088 }, { "epoch": 0.15, "grad_norm": 0.322265625, "learning_rate": 4.995420680640225e-05, "loss": 2.3353, "step": 1089 }, { "epoch": 0.15, "grad_norm": 0.30859375, "learning_rate": 4.995407866323406e-05, "loss": 2.322, "step": 1090 }, { "epoch": 0.15, "grad_norm": 0.306640625, "learning_rate": 4.995395034118949e-05, "loss": 2.3656, "step": 1091 }, { "epoch": 0.15, "grad_norm": 0.302734375, "learning_rate": 4.9953821840269464e-05, "loss": 2.3238, "step": 1092 }, { "epoch": 0.15, "grad_norm": 0.287109375, "learning_rate": 4.995369316047489e-05, "loss": 2.3232, "step": 1093 }, { "epoch": 0.15, "grad_norm": 0.298828125, "learning_rate": 4.99535643018067e-05, "loss": 2.3567, "step": 1094 }, { "epoch": 0.15, "grad_norm": 0.302734375, "learning_rate": 4.9953435264265814e-05, "loss": 2.3605, "step": 1095 }, { "epoch": 0.15, "grad_norm": 0.2890625, "learning_rate": 4.995330604785315e-05, "loss": 2.3435, "step": 1096 }, { "epoch": 0.15, "grad_norm": 0.29296875, "learning_rate": 4.995317665256965e-05, "loss": 2.3186, "step": 1097 }, { "epoch": 0.15, "grad_norm": 0.306640625, "learning_rate": 4.9953047078416226e-05, "loss": 2.3136, "step": 1098 }, { "epoch": 0.15, "grad_norm": 0.328125, "learning_rate": 4.995291732539382e-05, "loss": 2.3561, "step": 1099 }, { "epoch": 0.15, "grad_norm": 0.3125, "learning_rate": 4.995278739350335e-05, "loss": 2.3443, "step": 1100 }, { "epoch": 0.15, "grad_norm": 0.2890625, "learning_rate": 4.995265728274576e-05, "loss": 2.3515, "step": 1101 }, { "epoch": 0.15, "grad_norm": 0.2890625, "learning_rate": 4.995252699312197e-05, "loss": 2.3441, "step": 1102 }, { "epoch": 0.15, "grad_norm": 0.291015625, "learning_rate": 4.9952396524632924e-05, "loss": 2.2965, "step": 1103 }, { "epoch": 0.15, "grad_norm": 0.314453125, "learning_rate": 4.995226587727955e-05, "loss": 2.3472, "step": 1104 }, { "epoch": 0.15, "grad_norm": 0.314453125, "learning_rate": 4.9952135051062786e-05, "loss": 2.3584, "step": 1105 }, { "epoch": 0.15, "grad_norm": 0.306640625, "learning_rate": 4.995200404598357e-05, "loss": 2.314, "step": 1106 }, { "epoch": 0.15, "grad_norm": 0.32421875, "learning_rate": 4.995187286204285e-05, "loss": 2.3441, "step": 1107 }, { "epoch": 0.15, "grad_norm": 0.30859375, "learning_rate": 4.9951741499241555e-05, "loss": 2.3177, "step": 1108 }, { "epoch": 0.15, "grad_norm": 0.302734375, "learning_rate": 4.995160995758063e-05, "loss": 2.3245, "step": 1109 }, { "epoch": 0.15, "grad_norm": 0.3359375, "learning_rate": 4.995147823706102e-05, "loss": 2.3547, "step": 1110 }, { "epoch": 0.15, "grad_norm": 0.333984375, "learning_rate": 4.995134633768366e-05, "loss": 2.3162, "step": 1111 }, { "epoch": 0.15, "grad_norm": 0.30859375, "learning_rate": 4.9951214259449514e-05, "loss": 2.3255, "step": 1112 }, { "epoch": 0.15, "grad_norm": 0.310546875, "learning_rate": 4.995108200235951e-05, "loss": 2.358, "step": 1113 }, { "epoch": 0.15, "grad_norm": 0.318359375, "learning_rate": 4.995094956641461e-05, "loss": 2.3097, "step": 1114 }, { "epoch": 0.15, "grad_norm": 0.296875, "learning_rate": 4.995081695161575e-05, "loss": 2.3036, "step": 1115 }, { "epoch": 0.15, "grad_norm": 0.3125, "learning_rate": 4.995068415796389e-05, "loss": 2.3476, "step": 1116 }, { "epoch": 0.15, "grad_norm": 0.326171875, "learning_rate": 4.995055118545998e-05, "loss": 2.3388, "step": 1117 }, { "epoch": 0.15, "grad_norm": 0.30859375, "learning_rate": 4.995041803410497e-05, "loss": 2.3782, "step": 1118 }, { "epoch": 0.15, "grad_norm": 0.3125, "learning_rate": 4.995028470389983e-05, "loss": 2.323, "step": 1119 }, { "epoch": 0.15, "grad_norm": 0.322265625, "learning_rate": 4.995015119484549e-05, "loss": 2.3507, "step": 1120 }, { "epoch": 0.15, "grad_norm": 0.310546875, "learning_rate": 4.9950017506942925e-05, "loss": 2.3488, "step": 1121 }, { "epoch": 0.15, "grad_norm": 0.318359375, "learning_rate": 4.9949883640193086e-05, "loss": 2.3218, "step": 1122 }, { "epoch": 0.15, "grad_norm": 0.298828125, "learning_rate": 4.994974959459694e-05, "loss": 2.3373, "step": 1123 }, { "epoch": 0.15, "grad_norm": 0.322265625, "learning_rate": 4.994961537015543e-05, "loss": 2.3187, "step": 1124 }, { "epoch": 0.15, "grad_norm": 0.30078125, "learning_rate": 4.994948096686954e-05, "loss": 2.2965, "step": 1125 }, { "epoch": 0.15, "grad_norm": 0.29296875, "learning_rate": 4.9949346384740226e-05, "loss": 2.3075, "step": 1126 }, { "epoch": 0.15, "grad_norm": 0.28515625, "learning_rate": 4.994921162376845e-05, "loss": 2.3455, "step": 1127 }, { "epoch": 0.15, "grad_norm": 0.29296875, "learning_rate": 4.994907668395518e-05, "loss": 2.3389, "step": 1128 }, { "epoch": 0.15, "grad_norm": 0.2890625, "learning_rate": 4.994894156530138e-05, "loss": 2.3669, "step": 1129 }, { "epoch": 0.15, "grad_norm": 0.3046875, "learning_rate": 4.994880626780802e-05, "loss": 2.3136, "step": 1130 }, { "epoch": 0.15, "grad_norm": 0.31640625, "learning_rate": 4.994867079147607e-05, "loss": 2.3296, "step": 1131 }, { "epoch": 0.15, "grad_norm": 0.314453125, "learning_rate": 4.994853513630651e-05, "loss": 2.3615, "step": 1132 }, { "epoch": 0.15, "grad_norm": 0.291015625, "learning_rate": 4.9948399302300294e-05, "loss": 2.3376, "step": 1133 }, { "epoch": 0.15, "grad_norm": 0.294921875, "learning_rate": 4.994826328945841e-05, "loss": 2.3372, "step": 1134 }, { "epoch": 0.15, "grad_norm": 0.3046875, "learning_rate": 4.9948127097781827e-05, "loss": 2.3205, "step": 1135 }, { "epoch": 0.15, "grad_norm": 0.298828125, "learning_rate": 4.994799072727153e-05, "loss": 2.2967, "step": 1136 }, { "epoch": 0.15, "grad_norm": 0.310546875, "learning_rate": 4.994785417792849e-05, "loss": 2.3224, "step": 1137 }, { "epoch": 0.15, "grad_norm": 0.314453125, "learning_rate": 4.9947717449753674e-05, "loss": 2.3355, "step": 1138 }, { "epoch": 0.15, "grad_norm": 0.291015625, "learning_rate": 4.9947580542748085e-05, "loss": 2.3185, "step": 1139 }, { "epoch": 0.15, "grad_norm": 0.302734375, "learning_rate": 4.9947443456912685e-05, "loss": 2.3366, "step": 1140 }, { "epoch": 0.15, "grad_norm": 0.3046875, "learning_rate": 4.9947306192248464e-05, "loss": 2.2921, "step": 1141 }, { "epoch": 0.15, "grad_norm": 0.3359375, "learning_rate": 4.994716874875641e-05, "loss": 2.3509, "step": 1142 }, { "epoch": 0.15, "grad_norm": 0.326171875, "learning_rate": 4.994703112643751e-05, "loss": 2.3174, "step": 1143 }, { "epoch": 0.15, "grad_norm": 0.306640625, "learning_rate": 4.9946893325292746e-05, "loss": 2.3436, "step": 1144 }, { "epoch": 0.15, "grad_norm": 0.341796875, "learning_rate": 4.994675534532309e-05, "loss": 2.3181, "step": 1145 }, { "epoch": 0.15, "grad_norm": 0.29296875, "learning_rate": 4.9946617186529564e-05, "loss": 2.3178, "step": 1146 }, { "epoch": 0.15, "grad_norm": 0.3125, "learning_rate": 4.994647884891313e-05, "loss": 2.3218, "step": 1147 }, { "epoch": 0.15, "grad_norm": 0.322265625, "learning_rate": 4.994634033247479e-05, "loss": 2.317, "step": 1148 }, { "epoch": 0.15, "grad_norm": 0.3125, "learning_rate": 4.994620163721554e-05, "loss": 2.3097, "step": 1149 }, { "epoch": 0.15, "grad_norm": 0.328125, "learning_rate": 4.994606276313638e-05, "loss": 2.3584, "step": 1150 }, { "epoch": 0.15, "grad_norm": 0.287109375, "learning_rate": 4.994592371023829e-05, "loss": 2.334, "step": 1151 }, { "epoch": 0.15, "grad_norm": 0.306640625, "learning_rate": 4.9945784478522266e-05, "loss": 2.3375, "step": 1152 }, { "epoch": 0.15, "grad_norm": 0.294921875, "learning_rate": 4.9945645067989324e-05, "loss": 2.3292, "step": 1153 }, { "epoch": 0.15, "grad_norm": 0.380859375, "learning_rate": 4.994550547864044e-05, "loss": 2.3535, "step": 1154 }, { "epoch": 0.15, "grad_norm": 0.32421875, "learning_rate": 4.994536571047664e-05, "loss": 2.3015, "step": 1155 }, { "epoch": 0.15, "grad_norm": 0.30859375, "learning_rate": 4.9945225763498916e-05, "loss": 2.3333, "step": 1156 }, { "epoch": 0.15, "grad_norm": 0.3203125, "learning_rate": 4.994508563770825e-05, "loss": 2.3254, "step": 1157 }, { "epoch": 0.15, "grad_norm": 0.291015625, "learning_rate": 4.994494533310569e-05, "loss": 2.3449, "step": 1158 }, { "epoch": 0.15, "grad_norm": 0.30078125, "learning_rate": 4.99448048496922e-05, "loss": 2.3253, "step": 1159 }, { "epoch": 0.15, "grad_norm": 0.296875, "learning_rate": 4.9944664187468806e-05, "loss": 2.3617, "step": 1160 }, { "epoch": 0.15, "grad_norm": 0.29296875, "learning_rate": 4.9944523346436515e-05, "loss": 2.3255, "step": 1161 }, { "epoch": 0.16, "grad_norm": 0.2890625, "learning_rate": 4.994438232659634e-05, "loss": 2.3648, "step": 1162 }, { "epoch": 0.16, "grad_norm": 0.306640625, "learning_rate": 4.994424112794928e-05, "loss": 2.3261, "step": 1163 }, { "epoch": 0.16, "grad_norm": 0.283203125, "learning_rate": 4.9944099750496364e-05, "loss": 2.304, "step": 1164 }, { "epoch": 0.16, "grad_norm": 0.29296875, "learning_rate": 4.994395819423859e-05, "loss": 2.3212, "step": 1165 }, { "epoch": 0.16, "grad_norm": 1.3125, "learning_rate": 4.994381645917698e-05, "loss": 2.3326, "step": 1166 }, { "epoch": 0.16, "grad_norm": 0.322265625, "learning_rate": 4.9943674545312544e-05, "loss": 2.3864, "step": 1167 }, { "epoch": 0.16, "grad_norm": 0.333984375, "learning_rate": 4.994353245264631e-05, "loss": 2.3304, "step": 1168 }, { "epoch": 0.16, "grad_norm": 0.349609375, "learning_rate": 4.99433901811793e-05, "loss": 2.3235, "step": 1169 }, { "epoch": 0.16, "grad_norm": 0.32421875, "learning_rate": 4.9943247730912504e-05, "loss": 2.3562, "step": 1170 }, { "epoch": 0.16, "grad_norm": 0.3046875, "learning_rate": 4.994310510184698e-05, "loss": 2.3092, "step": 1171 }, { "epoch": 0.16, "grad_norm": 0.29296875, "learning_rate": 4.994296229398372e-05, "loss": 2.3173, "step": 1172 }, { "epoch": 0.16, "grad_norm": 0.328125, "learning_rate": 4.994281930732377e-05, "loss": 2.3437, "step": 1173 }, { "epoch": 0.16, "grad_norm": 0.326171875, "learning_rate": 4.994267614186815e-05, "loss": 2.3557, "step": 1174 }, { "epoch": 0.16, "grad_norm": 0.302734375, "learning_rate": 4.994253279761788e-05, "loss": 2.3234, "step": 1175 }, { "epoch": 0.16, "grad_norm": 0.31640625, "learning_rate": 4.9942389274573985e-05, "loss": 2.3283, "step": 1176 }, { "epoch": 0.16, "grad_norm": 0.333984375, "learning_rate": 4.99422455727375e-05, "loss": 2.3226, "step": 1177 }, { "epoch": 0.16, "grad_norm": 0.3046875, "learning_rate": 4.994210169210946e-05, "loss": 2.3256, "step": 1178 }, { "epoch": 0.16, "grad_norm": 0.30078125, "learning_rate": 4.994195763269089e-05, "loss": 2.3287, "step": 1179 }, { "epoch": 0.16, "grad_norm": 0.32421875, "learning_rate": 4.994181339448282e-05, "loss": 2.3475, "step": 1180 }, { "epoch": 0.16, "grad_norm": 0.302734375, "learning_rate": 4.994166897748629e-05, "loss": 2.3278, "step": 1181 }, { "epoch": 0.16, "grad_norm": 0.29296875, "learning_rate": 4.9941524381702335e-05, "loss": 2.3654, "step": 1182 }, { "epoch": 0.16, "grad_norm": 0.294921875, "learning_rate": 4.994137960713198e-05, "loss": 2.3061, "step": 1183 }, { "epoch": 0.16, "grad_norm": 0.310546875, "learning_rate": 4.994123465377628e-05, "loss": 2.3289, "step": 1184 }, { "epoch": 0.16, "grad_norm": 0.3125, "learning_rate": 4.994108952163626e-05, "loss": 2.3443, "step": 1185 }, { "epoch": 0.16, "grad_norm": 0.296875, "learning_rate": 4.994094421071297e-05, "loss": 2.3483, "step": 1186 }, { "epoch": 0.16, "grad_norm": 0.287109375, "learning_rate": 4.9940798721007444e-05, "loss": 2.3414, "step": 1187 }, { "epoch": 0.16, "grad_norm": 0.294921875, "learning_rate": 4.994065305252073e-05, "loss": 2.3387, "step": 1188 }, { "epoch": 0.16, "grad_norm": 0.3125, "learning_rate": 4.994050720525387e-05, "loss": 2.3005, "step": 1189 }, { "epoch": 0.16, "grad_norm": 0.29296875, "learning_rate": 4.994036117920792e-05, "loss": 2.348, "step": 1190 }, { "epoch": 0.16, "grad_norm": 0.302734375, "learning_rate": 4.99402149743839e-05, "loss": 2.3302, "step": 1191 }, { "epoch": 0.16, "grad_norm": 0.2890625, "learning_rate": 4.994006859078289e-05, "loss": 2.3243, "step": 1192 }, { "epoch": 0.16, "grad_norm": 0.337890625, "learning_rate": 4.9939922028405914e-05, "loss": 2.3496, "step": 1193 }, { "epoch": 0.16, "grad_norm": 0.3046875, "learning_rate": 4.9939775287254044e-05, "loss": 2.3568, "step": 1194 }, { "epoch": 0.16, "grad_norm": 0.30859375, "learning_rate": 4.9939628367328315e-05, "loss": 2.2948, "step": 1195 }, { "epoch": 0.16, "grad_norm": 0.310546875, "learning_rate": 4.9939481268629784e-05, "loss": 2.3569, "step": 1196 }, { "epoch": 0.16, "grad_norm": 0.302734375, "learning_rate": 4.993933399115951e-05, "loss": 2.347, "step": 1197 }, { "epoch": 0.16, "grad_norm": 0.2890625, "learning_rate": 4.9939186534918545e-05, "loss": 2.3136, "step": 1198 }, { "epoch": 0.16, "grad_norm": 0.30078125, "learning_rate": 4.9939038899907944e-05, "loss": 2.3327, "step": 1199 }, { "epoch": 0.16, "grad_norm": 0.314453125, "learning_rate": 4.993889108612877e-05, "loss": 2.3643, "step": 1200 }, { "epoch": 0.16, "grad_norm": 0.31640625, "learning_rate": 4.9938743093582085e-05, "loss": 2.3334, "step": 1201 }, { "epoch": 0.16, "grad_norm": 0.294921875, "learning_rate": 4.993859492226894e-05, "loss": 2.3495, "step": 1202 }, { "epoch": 0.16, "grad_norm": 0.291015625, "learning_rate": 4.993844657219041e-05, "loss": 2.319, "step": 1203 }, { "epoch": 0.16, "grad_norm": 0.3046875, "learning_rate": 4.9938298043347545e-05, "loss": 2.3327, "step": 1204 }, { "epoch": 0.16, "grad_norm": 0.291015625, "learning_rate": 4.993814933574142e-05, "loss": 2.3032, "step": 1205 }, { "epoch": 0.16, "grad_norm": 0.3203125, "learning_rate": 4.993800044937309e-05, "loss": 2.3428, "step": 1206 }, { "epoch": 0.16, "grad_norm": 0.30859375, "learning_rate": 4.9937851384243636e-05, "loss": 2.3739, "step": 1207 }, { "epoch": 0.16, "grad_norm": 0.29296875, "learning_rate": 4.9937702140354125e-05, "loss": 2.3206, "step": 1208 }, { "epoch": 0.16, "grad_norm": 0.2890625, "learning_rate": 4.9937552717705615e-05, "loss": 2.3384, "step": 1209 }, { "epoch": 0.16, "grad_norm": 0.29296875, "learning_rate": 4.993740311629918e-05, "loss": 2.3136, "step": 1210 }, { "epoch": 0.16, "grad_norm": 0.306640625, "learning_rate": 4.99372533361359e-05, "loss": 2.3568, "step": 1211 }, { "epoch": 0.16, "grad_norm": 0.294921875, "learning_rate": 4.993710337721684e-05, "loss": 2.3306, "step": 1212 }, { "epoch": 0.16, "grad_norm": 0.302734375, "learning_rate": 4.99369532395431e-05, "loss": 2.2945, "step": 1213 }, { "epoch": 0.16, "grad_norm": 0.27734375, "learning_rate": 4.993680292311571e-05, "loss": 2.3765, "step": 1214 }, { "epoch": 0.16, "grad_norm": 0.28125, "learning_rate": 4.9936652427935785e-05, "loss": 2.3355, "step": 1215 }, { "epoch": 0.16, "grad_norm": 0.302734375, "learning_rate": 4.99365017540044e-05, "loss": 2.3546, "step": 1216 }, { "epoch": 0.16, "grad_norm": 0.287109375, "learning_rate": 4.993635090132261e-05, "loss": 2.3167, "step": 1217 }, { "epoch": 0.16, "grad_norm": 0.3046875, "learning_rate": 4.993619986989152e-05, "loss": 2.3566, "step": 1218 }, { "epoch": 0.16, "grad_norm": 0.28125, "learning_rate": 4.993604865971221e-05, "loss": 2.3489, "step": 1219 }, { "epoch": 0.16, "grad_norm": 0.294921875, "learning_rate": 4.9935897270785756e-05, "loss": 2.3026, "step": 1220 }, { "epoch": 0.16, "grad_norm": 0.302734375, "learning_rate": 4.9935745703113254e-05, "loss": 2.3186, "step": 1221 }, { "epoch": 0.16, "grad_norm": 0.3046875, "learning_rate": 4.993559395669577e-05, "loss": 2.2895, "step": 1222 }, { "epoch": 0.16, "grad_norm": 0.3046875, "learning_rate": 4.993544203153442e-05, "loss": 2.3362, "step": 1223 }, { "epoch": 0.16, "grad_norm": 0.310546875, "learning_rate": 4.993528992763027e-05, "loss": 2.3291, "step": 1224 }, { "epoch": 0.16, "grad_norm": 0.3046875, "learning_rate": 4.993513764498443e-05, "loss": 2.3247, "step": 1225 }, { "epoch": 0.16, "grad_norm": 0.314453125, "learning_rate": 4.993498518359797e-05, "loss": 2.3055, "step": 1226 }, { "epoch": 0.16, "grad_norm": 0.306640625, "learning_rate": 4.9934832543472e-05, "loss": 2.3304, "step": 1227 }, { "epoch": 0.16, "grad_norm": 0.28125, "learning_rate": 4.99346797246076e-05, "loss": 2.3342, "step": 1228 }, { "epoch": 0.16, "grad_norm": 0.302734375, "learning_rate": 4.993452672700588e-05, "loss": 2.3142, "step": 1229 }, { "epoch": 0.16, "grad_norm": 0.302734375, "learning_rate": 4.9934373550667924e-05, "loss": 2.362, "step": 1230 }, { "epoch": 0.16, "grad_norm": 0.306640625, "learning_rate": 4.993422019559484e-05, "loss": 2.3348, "step": 1231 }, { "epoch": 0.16, "grad_norm": 0.302734375, "learning_rate": 4.9934066661787716e-05, "loss": 2.3457, "step": 1232 }, { "epoch": 0.16, "grad_norm": 0.283203125, "learning_rate": 4.9933912949247665e-05, "loss": 2.3193, "step": 1233 }, { "epoch": 0.16, "grad_norm": 0.3125, "learning_rate": 4.993375905797578e-05, "loss": 2.3415, "step": 1234 }, { "epoch": 0.16, "grad_norm": 0.28515625, "learning_rate": 4.9933604987973175e-05, "loss": 2.3549, "step": 1235 }, { "epoch": 0.16, "grad_norm": 0.30078125, "learning_rate": 4.993345073924094e-05, "loss": 2.3153, "step": 1236 }, { "epoch": 0.17, "grad_norm": 0.31640625, "learning_rate": 4.993329631178019e-05, "loss": 2.3564, "step": 1237 }, { "epoch": 0.17, "grad_norm": 0.326171875, "learning_rate": 4.9933141705592035e-05, "loss": 2.3043, "step": 1238 }, { "epoch": 0.17, "grad_norm": 0.30859375, "learning_rate": 4.993298692067757e-05, "loss": 2.3496, "step": 1239 }, { "epoch": 0.17, "grad_norm": 0.31640625, "learning_rate": 4.993283195703792e-05, "loss": 2.3195, "step": 1240 }, { "epoch": 0.17, "grad_norm": 0.294921875, "learning_rate": 4.993267681467417e-05, "loss": 2.3284, "step": 1241 }, { "epoch": 0.17, "grad_norm": 0.294921875, "learning_rate": 4.993252149358747e-05, "loss": 2.3138, "step": 1242 }, { "epoch": 0.17, "grad_norm": 0.2890625, "learning_rate": 4.9932365993778906e-05, "loss": 2.3486, "step": 1243 }, { "epoch": 0.17, "grad_norm": 0.302734375, "learning_rate": 4.993221031524961e-05, "loss": 2.3284, "step": 1244 }, { "epoch": 0.17, "grad_norm": 0.3125, "learning_rate": 4.993205445800067e-05, "loss": 2.3333, "step": 1245 }, { "epoch": 0.17, "grad_norm": 0.3203125, "learning_rate": 4.993189842203323e-05, "loss": 2.3302, "step": 1246 }, { "epoch": 0.17, "grad_norm": 0.3203125, "learning_rate": 4.9931742207348404e-05, "loss": 2.3155, "step": 1247 }, { "epoch": 0.17, "grad_norm": 0.318359375, "learning_rate": 4.99315858139473e-05, "loss": 2.3329, "step": 1248 }, { "epoch": 0.17, "grad_norm": 0.30078125, "learning_rate": 4.993142924183105e-05, "loss": 2.3051, "step": 1249 }, { "epoch": 0.17, "grad_norm": 0.287109375, "learning_rate": 4.993127249100078e-05, "loss": 2.3182, "step": 1250 }, { "epoch": 0.17, "grad_norm": 0.3203125, "learning_rate": 4.99311155614576e-05, "loss": 2.3554, "step": 1251 }, { "epoch": 0.17, "grad_norm": 0.328125, "learning_rate": 4.993095845320264e-05, "loss": 2.3059, "step": 1252 }, { "epoch": 0.17, "grad_norm": 0.30859375, "learning_rate": 4.993080116623703e-05, "loss": 2.3492, "step": 1253 }, { "epoch": 0.17, "grad_norm": 0.30859375, "learning_rate": 4.99306437005619e-05, "loss": 2.2925, "step": 1254 }, { "epoch": 0.17, "grad_norm": 0.310546875, "learning_rate": 4.993048605617837e-05, "loss": 2.301, "step": 1255 }, { "epoch": 0.17, "grad_norm": 0.314453125, "learning_rate": 4.993032823308758e-05, "loss": 2.3397, "step": 1256 }, { "epoch": 0.17, "grad_norm": 0.310546875, "learning_rate": 4.9930170231290643e-05, "loss": 2.3469, "step": 1257 }, { "epoch": 0.17, "grad_norm": 0.306640625, "learning_rate": 4.993001205078871e-05, "loss": 2.3109, "step": 1258 }, { "epoch": 0.17, "grad_norm": 0.30078125, "learning_rate": 4.992985369158291e-05, "loss": 2.2969, "step": 1259 }, { "epoch": 0.17, "grad_norm": 0.29296875, "learning_rate": 4.992969515367438e-05, "loss": 2.3441, "step": 1260 }, { "epoch": 0.17, "grad_norm": 0.30078125, "learning_rate": 4.992953643706425e-05, "loss": 2.3323, "step": 1261 }, { "epoch": 0.17, "grad_norm": 0.31640625, "learning_rate": 4.992937754175366e-05, "loss": 2.3424, "step": 1262 }, { "epoch": 0.17, "grad_norm": 0.32421875, "learning_rate": 4.992921846774375e-05, "loss": 2.3081, "step": 1263 }, { "epoch": 0.17, "grad_norm": 0.298828125, "learning_rate": 4.992905921503566e-05, "loss": 2.3182, "step": 1264 }, { "epoch": 0.17, "grad_norm": 0.32421875, "learning_rate": 4.992889978363053e-05, "loss": 2.3359, "step": 1265 }, { "epoch": 0.17, "grad_norm": 0.326171875, "learning_rate": 4.992874017352952e-05, "loss": 2.3099, "step": 1266 }, { "epoch": 0.17, "grad_norm": 0.283203125, "learning_rate": 4.992858038473374e-05, "loss": 2.3866, "step": 1267 }, { "epoch": 0.17, "grad_norm": 0.34765625, "learning_rate": 4.992842041724437e-05, "loss": 2.3284, "step": 1268 }, { "epoch": 0.17, "grad_norm": 0.3203125, "learning_rate": 4.992826027106253e-05, "loss": 2.3514, "step": 1269 }, { "epoch": 0.17, "grad_norm": 0.3359375, "learning_rate": 4.992809994618938e-05, "loss": 2.3242, "step": 1270 }, { "epoch": 0.17, "grad_norm": 0.298828125, "learning_rate": 4.992793944262607e-05, "loss": 2.2983, "step": 1271 }, { "epoch": 0.17, "grad_norm": 0.298828125, "learning_rate": 4.992777876037375e-05, "loss": 2.3181, "step": 1272 }, { "epoch": 0.17, "grad_norm": 0.296875, "learning_rate": 4.992761789943356e-05, "loss": 2.3288, "step": 1273 }, { "epoch": 0.17, "grad_norm": 0.314453125, "learning_rate": 4.9927456859806684e-05, "loss": 2.3015, "step": 1274 }, { "epoch": 0.17, "grad_norm": 0.30078125, "learning_rate": 4.992729564149424e-05, "loss": 2.349, "step": 1275 }, { "epoch": 0.17, "grad_norm": 0.306640625, "learning_rate": 4.992713424449741e-05, "loss": 2.3293, "step": 1276 }, { "epoch": 0.17, "grad_norm": 0.29296875, "learning_rate": 4.992697266881733e-05, "loss": 2.3501, "step": 1277 }, { "epoch": 0.17, "grad_norm": 0.322265625, "learning_rate": 4.992681091445518e-05, "loss": 2.308, "step": 1278 }, { "epoch": 0.17, "grad_norm": 0.294921875, "learning_rate": 4.9926648981412104e-05, "loss": 2.3119, "step": 1279 }, { "epoch": 0.17, "grad_norm": 0.318359375, "learning_rate": 4.992648686968927e-05, "loss": 2.315, "step": 1280 }, { "epoch": 0.17, "grad_norm": 0.31640625, "learning_rate": 4.9926324579287834e-05, "loss": 2.3157, "step": 1281 }, { "epoch": 0.17, "grad_norm": 0.3125, "learning_rate": 4.992616211020896e-05, "loss": 2.3151, "step": 1282 }, { "epoch": 0.17, "grad_norm": 0.296875, "learning_rate": 4.992599946245381e-05, "loss": 2.3026, "step": 1283 }, { "epoch": 0.17, "grad_norm": 0.30078125, "learning_rate": 4.992583663602356e-05, "loss": 2.3118, "step": 1284 }, { "epoch": 0.17, "grad_norm": 0.32421875, "learning_rate": 4.992567363091938e-05, "loss": 2.3417, "step": 1285 }, { "epoch": 0.17, "grad_norm": 0.328125, "learning_rate": 4.992551044714243e-05, "loss": 2.3008, "step": 1286 }, { "epoch": 0.17, "grad_norm": 0.3125, "learning_rate": 4.992534708469387e-05, "loss": 2.3343, "step": 1287 }, { "epoch": 0.17, "grad_norm": 0.298828125, "learning_rate": 4.992518354357489e-05, "loss": 2.3425, "step": 1288 }, { "epoch": 0.17, "grad_norm": 0.294921875, "learning_rate": 4.992501982378664e-05, "loss": 2.2999, "step": 1289 }, { "epoch": 0.17, "grad_norm": 0.32421875, "learning_rate": 4.9924855925330326e-05, "loss": 2.3253, "step": 1290 }, { "epoch": 0.17, "grad_norm": 0.30859375, "learning_rate": 4.992469184820709e-05, "loss": 2.3402, "step": 1291 }, { "epoch": 0.17, "grad_norm": 0.30859375, "learning_rate": 4.9924527592418137e-05, "loss": 2.3356, "step": 1292 }, { "epoch": 0.17, "grad_norm": 0.29296875, "learning_rate": 4.9924363157964616e-05, "loss": 2.3242, "step": 1293 }, { "epoch": 0.17, "grad_norm": 0.306640625, "learning_rate": 4.992419854484772e-05, "loss": 2.3414, "step": 1294 }, { "epoch": 0.17, "grad_norm": 0.306640625, "learning_rate": 4.992403375306864e-05, "loss": 2.2932, "step": 1295 }, { "epoch": 0.17, "grad_norm": 0.31640625, "learning_rate": 4.992386878262854e-05, "loss": 2.3285, "step": 1296 }, { "epoch": 0.17, "grad_norm": 0.3046875, "learning_rate": 4.992370363352861e-05, "loss": 2.3396, "step": 1297 }, { "epoch": 0.17, "grad_norm": 0.33984375, "learning_rate": 4.9923538305770025e-05, "loss": 2.3268, "step": 1298 }, { "epoch": 0.17, "grad_norm": 0.314453125, "learning_rate": 4.992337279935397e-05, "loss": 2.3679, "step": 1299 }, { "epoch": 0.17, "grad_norm": 0.314453125, "learning_rate": 4.9923207114281654e-05, "loss": 2.3224, "step": 1300 }, { "epoch": 0.17, "grad_norm": 0.302734375, "learning_rate": 4.9923041250554245e-05, "loss": 2.2959, "step": 1301 }, { "epoch": 0.17, "grad_norm": 0.32421875, "learning_rate": 4.992287520817294e-05, "loss": 2.3535, "step": 1302 }, { "epoch": 0.17, "grad_norm": 0.3046875, "learning_rate": 4.9922708987138916e-05, "loss": 2.3134, "step": 1303 }, { "epoch": 0.17, "grad_norm": 0.3203125, "learning_rate": 4.992254258745338e-05, "loss": 2.2977, "step": 1304 }, { "epoch": 0.17, "grad_norm": 0.318359375, "learning_rate": 4.992237600911752e-05, "loss": 2.3379, "step": 1305 }, { "epoch": 0.17, "grad_norm": 0.30078125, "learning_rate": 4.992220925213252e-05, "loss": 2.3675, "step": 1306 }, { "epoch": 0.17, "grad_norm": 0.310546875, "learning_rate": 4.9922042316499596e-05, "loss": 2.2865, "step": 1307 }, { "epoch": 0.17, "grad_norm": 0.302734375, "learning_rate": 4.9921875202219926e-05, "loss": 2.3339, "step": 1308 }, { "epoch": 0.17, "grad_norm": 0.306640625, "learning_rate": 4.992170790929471e-05, "loss": 2.3202, "step": 1309 }, { "epoch": 0.17, "grad_norm": 0.291015625, "learning_rate": 4.992154043772516e-05, "loss": 2.3252, "step": 1310 }, { "epoch": 0.17, "grad_norm": 0.30078125, "learning_rate": 4.992137278751247e-05, "loss": 2.3294, "step": 1311 }, { "epoch": 0.18, "grad_norm": 0.2890625, "learning_rate": 4.992120495865783e-05, "loss": 2.3201, "step": 1312 }, { "epoch": 0.18, "grad_norm": 0.30859375, "learning_rate": 4.992103695116246e-05, "loss": 2.3132, "step": 1313 }, { "epoch": 0.18, "grad_norm": 0.32421875, "learning_rate": 4.992086876502756e-05, "loss": 2.3133, "step": 1314 }, { "epoch": 0.18, "grad_norm": 0.330078125, "learning_rate": 4.9920700400254324e-05, "loss": 2.3427, "step": 1315 }, { "epoch": 0.18, "grad_norm": 0.3125, "learning_rate": 4.9920531856843976e-05, "loss": 2.325, "step": 1316 }, { "epoch": 0.18, "grad_norm": 0.3046875, "learning_rate": 4.992036313479771e-05, "loss": 2.3154, "step": 1317 }, { "epoch": 0.18, "grad_norm": 0.3125, "learning_rate": 4.992019423411675e-05, "loss": 2.3243, "step": 1318 }, { "epoch": 0.18, "grad_norm": 0.3125, "learning_rate": 4.992002515480228e-05, "loss": 2.3224, "step": 1319 }, { "epoch": 0.18, "grad_norm": 0.3203125, "learning_rate": 4.991985589685555e-05, "loss": 2.3499, "step": 1320 }, { "epoch": 0.18, "grad_norm": 0.3046875, "learning_rate": 4.991968646027775e-05, "loss": 2.3254, "step": 1321 }, { "epoch": 0.18, "grad_norm": 0.31640625, "learning_rate": 4.991951684507009e-05, "loss": 2.3145, "step": 1322 }, { "epoch": 0.18, "grad_norm": 0.294921875, "learning_rate": 4.99193470512338e-05, "loss": 2.3378, "step": 1323 }, { "epoch": 0.18, "grad_norm": 0.302734375, "learning_rate": 4.9919177078770085e-05, "loss": 2.328, "step": 1324 }, { "epoch": 0.18, "grad_norm": 0.3046875, "learning_rate": 4.991900692768018e-05, "loss": 2.3498, "step": 1325 }, { "epoch": 0.18, "grad_norm": 0.314453125, "learning_rate": 4.9918836597965287e-05, "loss": 2.332, "step": 1326 }, { "epoch": 0.18, "grad_norm": 0.29296875, "learning_rate": 4.991866608962663e-05, "loss": 2.35, "step": 1327 }, { "epoch": 0.18, "grad_norm": 0.310546875, "learning_rate": 4.991849540266544e-05, "loss": 2.3063, "step": 1328 }, { "epoch": 0.18, "grad_norm": 0.3359375, "learning_rate": 4.9918324537082936e-05, "loss": 2.3164, "step": 1329 }, { "epoch": 0.18, "grad_norm": 0.341796875, "learning_rate": 4.9918153492880345e-05, "loss": 2.3314, "step": 1330 }, { "epoch": 0.18, "grad_norm": 0.326171875, "learning_rate": 4.991798227005888e-05, "loss": 2.3186, "step": 1331 }, { "epoch": 0.18, "grad_norm": 0.30859375, "learning_rate": 4.99178108686198e-05, "loss": 2.3062, "step": 1332 }, { "epoch": 0.18, "grad_norm": 0.29296875, "learning_rate": 4.991763928856429e-05, "loss": 2.3571, "step": 1333 }, { "epoch": 0.18, "grad_norm": 0.30859375, "learning_rate": 4.9917467529893614e-05, "loss": 2.3052, "step": 1334 }, { "epoch": 0.18, "grad_norm": 0.296875, "learning_rate": 4.9917295592608994e-05, "loss": 2.3385, "step": 1335 }, { "epoch": 0.18, "grad_norm": 0.306640625, "learning_rate": 4.9917123476711654e-05, "loss": 2.3405, "step": 1336 }, { "epoch": 0.18, "grad_norm": 0.306640625, "learning_rate": 4.9916951182202834e-05, "loss": 2.3157, "step": 1337 }, { "epoch": 0.18, "grad_norm": 0.310546875, "learning_rate": 4.9916778709083775e-05, "loss": 2.2885, "step": 1338 }, { "epoch": 0.18, "grad_norm": 0.2890625, "learning_rate": 4.9916606057355696e-05, "loss": 2.3222, "step": 1339 }, { "epoch": 0.18, "grad_norm": 0.271484375, "learning_rate": 4.991643322701986e-05, "loss": 2.3183, "step": 1340 }, { "epoch": 0.18, "grad_norm": 0.3125, "learning_rate": 4.991626021807748e-05, "loss": 2.3338, "step": 1341 }, { "epoch": 0.18, "grad_norm": 0.291015625, "learning_rate": 4.991608703052982e-05, "loss": 2.3397, "step": 1342 }, { "epoch": 0.18, "grad_norm": 0.3046875, "learning_rate": 4.99159136643781e-05, "loss": 2.3163, "step": 1343 }, { "epoch": 0.18, "grad_norm": 0.294921875, "learning_rate": 4.991574011962358e-05, "loss": 2.313, "step": 1344 }, { "epoch": 0.18, "grad_norm": 0.298828125, "learning_rate": 4.9915566396267485e-05, "loss": 2.3203, "step": 1345 }, { "epoch": 0.18, "grad_norm": 0.298828125, "learning_rate": 4.991539249431108e-05, "loss": 2.2816, "step": 1346 }, { "epoch": 0.18, "grad_norm": 0.330078125, "learning_rate": 4.991521841375561e-05, "loss": 2.3124, "step": 1347 }, { "epoch": 0.18, "grad_norm": 0.3046875, "learning_rate": 4.9915044154602296e-05, "loss": 2.3291, "step": 1348 }, { "epoch": 0.18, "grad_norm": 0.314453125, "learning_rate": 4.991486971685242e-05, "loss": 2.3649, "step": 1349 }, { "epoch": 0.18, "grad_norm": 0.318359375, "learning_rate": 4.991469510050722e-05, "loss": 2.33, "step": 1350 }, { "epoch": 0.18, "grad_norm": 0.30078125, "learning_rate": 4.991452030556795e-05, "loss": 2.3209, "step": 1351 }, { "epoch": 0.18, "grad_norm": 0.30859375, "learning_rate": 4.991434533203585e-05, "loss": 2.3267, "step": 1352 }, { "epoch": 0.18, "grad_norm": 0.306640625, "learning_rate": 4.991417017991219e-05, "loss": 2.3222, "step": 1353 }, { "epoch": 0.18, "grad_norm": 0.2890625, "learning_rate": 4.991399484919822e-05, "loss": 2.3293, "step": 1354 }, { "epoch": 0.18, "grad_norm": 0.330078125, "learning_rate": 4.9913819339895194e-05, "loss": 2.3374, "step": 1355 }, { "epoch": 0.18, "grad_norm": 0.318359375, "learning_rate": 4.991364365200438e-05, "loss": 2.3386, "step": 1356 }, { "epoch": 0.18, "grad_norm": 0.318359375, "learning_rate": 4.9913467785527024e-05, "loss": 2.3148, "step": 1357 }, { "epoch": 0.18, "grad_norm": 0.3203125, "learning_rate": 4.9913291740464396e-05, "loss": 2.3139, "step": 1358 }, { "epoch": 0.18, "grad_norm": 0.31640625, "learning_rate": 4.991311551681775e-05, "loss": 2.3472, "step": 1359 }, { "epoch": 0.18, "grad_norm": 0.310546875, "learning_rate": 4.991293911458836e-05, "loss": 2.3377, "step": 1360 }, { "epoch": 0.18, "grad_norm": 0.326171875, "learning_rate": 4.991276253377748e-05, "loss": 2.3067, "step": 1361 }, { "epoch": 0.18, "grad_norm": 0.314453125, "learning_rate": 4.991258577438638e-05, "loss": 2.2995, "step": 1362 }, { "epoch": 0.18, "grad_norm": 0.310546875, "learning_rate": 4.991240883641633e-05, "loss": 2.3124, "step": 1363 }, { "epoch": 0.18, "grad_norm": 0.306640625, "learning_rate": 4.991223171986859e-05, "loss": 2.3033, "step": 1364 }, { "epoch": 0.18, "grad_norm": 0.306640625, "learning_rate": 4.991205442474445e-05, "loss": 2.3281, "step": 1365 }, { "epoch": 0.18, "grad_norm": 0.30078125, "learning_rate": 4.991187695104515e-05, "loss": 2.2938, "step": 1366 }, { "epoch": 0.18, "grad_norm": 0.302734375, "learning_rate": 4.991169929877198e-05, "loss": 2.3576, "step": 1367 }, { "epoch": 0.18, "grad_norm": 0.32421875, "learning_rate": 4.9911521467926224e-05, "loss": 2.2856, "step": 1368 }, { "epoch": 0.18, "grad_norm": 0.32421875, "learning_rate": 4.9911343458509135e-05, "loss": 2.3195, "step": 1369 }, { "epoch": 0.18, "grad_norm": 0.322265625, "learning_rate": 4.9911165270521996e-05, "loss": 2.3365, "step": 1370 }, { "epoch": 0.18, "grad_norm": 0.3046875, "learning_rate": 4.9910986903966096e-05, "loss": 2.2948, "step": 1371 }, { "epoch": 0.18, "grad_norm": 0.287109375, "learning_rate": 4.9910808358842694e-05, "loss": 2.3298, "step": 1372 }, { "epoch": 0.18, "grad_norm": 0.294921875, "learning_rate": 4.991062963515309e-05, "loss": 2.3073, "step": 1373 }, { "epoch": 0.18, "grad_norm": 0.306640625, "learning_rate": 4.991045073289855e-05, "loss": 2.3014, "step": 1374 }, { "epoch": 0.18, "grad_norm": 0.326171875, "learning_rate": 4.991027165208036e-05, "loss": 2.3028, "step": 1375 }, { "epoch": 0.18, "grad_norm": 0.306640625, "learning_rate": 4.9910092392699816e-05, "loss": 2.3214, "step": 1376 }, { "epoch": 0.18, "grad_norm": 0.302734375, "learning_rate": 4.990991295475818e-05, "loss": 2.3046, "step": 1377 }, { "epoch": 0.18, "grad_norm": 0.30859375, "learning_rate": 4.990973333825676e-05, "loss": 2.325, "step": 1378 }, { "epoch": 0.18, "grad_norm": 0.29296875, "learning_rate": 4.990955354319683e-05, "loss": 2.295, "step": 1379 }, { "epoch": 0.18, "grad_norm": 0.294921875, "learning_rate": 4.9909373569579675e-05, "loss": 2.3339, "step": 1380 }, { "epoch": 0.18, "grad_norm": 0.310546875, "learning_rate": 4.99091934174066e-05, "loss": 2.3342, "step": 1381 }, { "epoch": 0.18, "grad_norm": 0.3125, "learning_rate": 4.990901308667889e-05, "loss": 2.2926, "step": 1382 }, { "epoch": 0.18, "grad_norm": 0.302734375, "learning_rate": 4.9908832577397843e-05, "loss": 2.3141, "step": 1383 }, { "epoch": 0.18, "grad_norm": 0.298828125, "learning_rate": 4.990865188956474e-05, "loss": 2.3324, "step": 1384 }, { "epoch": 0.18, "grad_norm": 0.28515625, "learning_rate": 4.990847102318088e-05, "loss": 2.3075, "step": 1385 }, { "epoch": 0.18, "grad_norm": 0.298828125, "learning_rate": 4.990828997824757e-05, "loss": 2.2834, "step": 1386 }, { "epoch": 0.19, "grad_norm": 0.296875, "learning_rate": 4.9908108754766095e-05, "loss": 2.3348, "step": 1387 }, { "epoch": 0.19, "grad_norm": 0.314453125, "learning_rate": 4.990792735273777e-05, "loss": 2.3197, "step": 1388 }, { "epoch": 0.19, "grad_norm": 0.29296875, "learning_rate": 4.9907745772163874e-05, "loss": 2.3264, "step": 1389 }, { "epoch": 0.19, "grad_norm": 0.30859375, "learning_rate": 4.990756401304572e-05, "loss": 2.3352, "step": 1390 }, { "epoch": 0.19, "grad_norm": 0.30078125, "learning_rate": 4.990738207538461e-05, "loss": 2.3394, "step": 1391 }, { "epoch": 0.19, "grad_norm": 0.318359375, "learning_rate": 4.990719995918186e-05, "loss": 2.309, "step": 1392 }, { "epoch": 0.19, "grad_norm": 0.2890625, "learning_rate": 4.990701766443875e-05, "loss": 2.3035, "step": 1393 }, { "epoch": 0.19, "grad_norm": 0.30078125, "learning_rate": 4.990683519115661e-05, "loss": 2.3289, "step": 1394 }, { "epoch": 0.19, "grad_norm": 0.310546875, "learning_rate": 4.990665253933673e-05, "loss": 2.3068, "step": 1395 }, { "epoch": 0.19, "grad_norm": 0.2890625, "learning_rate": 4.9906469708980444e-05, "loss": 2.3247, "step": 1396 }, { "epoch": 0.19, "grad_norm": 0.302734375, "learning_rate": 4.990628670008903e-05, "loss": 2.3244, "step": 1397 }, { "epoch": 0.19, "grad_norm": 0.283203125, "learning_rate": 4.990610351266382e-05, "loss": 2.2859, "step": 1398 }, { "epoch": 0.19, "grad_norm": 0.302734375, "learning_rate": 4.9905920146706134e-05, "loss": 2.3302, "step": 1399 }, { "epoch": 0.19, "grad_norm": 0.296875, "learning_rate": 4.990573660221726e-05, "loss": 2.3099, "step": 1400 }, { "epoch": 0.19, "grad_norm": 0.296875, "learning_rate": 4.9905552879198536e-05, "loss": 2.3131, "step": 1401 }, { "epoch": 0.19, "grad_norm": 0.2890625, "learning_rate": 4.990536897765128e-05, "loss": 2.3214, "step": 1402 }, { "epoch": 0.19, "grad_norm": 0.287109375, "learning_rate": 4.990518489757679e-05, "loss": 2.3211, "step": 1403 }, { "epoch": 0.19, "grad_norm": 0.2890625, "learning_rate": 4.990500063897641e-05, "loss": 2.3156, "step": 1404 }, { "epoch": 0.19, "grad_norm": 0.294921875, "learning_rate": 4.990481620185145e-05, "loss": 2.3115, "step": 1405 }, { "epoch": 0.19, "grad_norm": 0.302734375, "learning_rate": 4.990463158620322e-05, "loss": 2.3254, "step": 1406 }, { "epoch": 0.19, "grad_norm": 0.310546875, "learning_rate": 4.990444679203307e-05, "loss": 2.3019, "step": 1407 }, { "epoch": 0.19, "grad_norm": 0.35546875, "learning_rate": 4.990426181934229e-05, "loss": 2.3141, "step": 1408 }, { "epoch": 0.19, "grad_norm": 0.3203125, "learning_rate": 4.990407666813224e-05, "loss": 2.3226, "step": 1409 }, { "epoch": 0.19, "grad_norm": 0.294921875, "learning_rate": 4.990389133840423e-05, "loss": 2.2959, "step": 1410 }, { "epoch": 0.19, "grad_norm": 0.306640625, "learning_rate": 4.9903705830159585e-05, "loss": 2.3261, "step": 1411 }, { "epoch": 0.19, "grad_norm": 0.30859375, "learning_rate": 4.9903520143399644e-05, "loss": 2.3193, "step": 1412 }, { "epoch": 0.19, "grad_norm": 0.28515625, "learning_rate": 4.9903334278125735e-05, "loss": 2.3219, "step": 1413 }, { "epoch": 0.19, "grad_norm": 0.306640625, "learning_rate": 4.990314823433919e-05, "loss": 2.3299, "step": 1414 }, { "epoch": 0.19, "grad_norm": 0.32421875, "learning_rate": 4.990296201204134e-05, "loss": 2.2927, "step": 1415 }, { "epoch": 0.19, "grad_norm": 0.3046875, "learning_rate": 4.9902775611233535e-05, "loss": 2.3231, "step": 1416 }, { "epoch": 0.19, "grad_norm": 0.306640625, "learning_rate": 4.990258903191709e-05, "loss": 2.2897, "step": 1417 }, { "epoch": 0.19, "grad_norm": 0.30078125, "learning_rate": 4.9902402274093354e-05, "loss": 2.3071, "step": 1418 }, { "epoch": 0.19, "grad_norm": 0.306640625, "learning_rate": 4.990221533776366e-05, "loss": 2.3077, "step": 1419 }, { "epoch": 0.19, "grad_norm": 0.30078125, "learning_rate": 4.990202822292934e-05, "loss": 2.3458, "step": 1420 }, { "epoch": 0.19, "grad_norm": 0.310546875, "learning_rate": 4.990184092959176e-05, "loss": 2.3092, "step": 1421 }, { "epoch": 0.19, "grad_norm": 0.3125, "learning_rate": 4.990165345775225e-05, "loss": 2.3037, "step": 1422 }, { "epoch": 0.19, "grad_norm": 0.3125, "learning_rate": 4.990146580741215e-05, "loss": 2.3102, "step": 1423 }, { "epoch": 0.19, "grad_norm": 0.294921875, "learning_rate": 4.99012779785728e-05, "loss": 2.3336, "step": 1424 }, { "epoch": 0.19, "grad_norm": 0.3046875, "learning_rate": 4.990108997123556e-05, "loss": 2.3105, "step": 1425 }, { "epoch": 0.19, "grad_norm": 0.310546875, "learning_rate": 4.990090178540178e-05, "loss": 2.3431, "step": 1426 }, { "epoch": 0.19, "grad_norm": 0.32421875, "learning_rate": 4.990071342107279e-05, "loss": 2.3053, "step": 1427 }, { "epoch": 0.19, "grad_norm": 0.296875, "learning_rate": 4.990052487824995e-05, "loss": 2.3252, "step": 1428 }, { "epoch": 0.19, "grad_norm": 0.3046875, "learning_rate": 4.990033615693461e-05, "loss": 2.3726, "step": 1429 }, { "epoch": 0.19, "grad_norm": 0.318359375, "learning_rate": 4.9900147257128136e-05, "loss": 2.336, "step": 1430 }, { "epoch": 0.19, "grad_norm": 0.2890625, "learning_rate": 4.989995817883187e-05, "loss": 2.3381, "step": 1431 }, { "epoch": 0.19, "grad_norm": 0.302734375, "learning_rate": 4.9899768922047166e-05, "loss": 2.3503, "step": 1432 }, { "epoch": 0.19, "grad_norm": 0.294921875, "learning_rate": 4.989957948677538e-05, "loss": 2.3079, "step": 1433 }, { "epoch": 0.19, "grad_norm": 0.30859375, "learning_rate": 4.9899389873017874e-05, "loss": 2.288, "step": 1434 }, { "epoch": 0.19, "grad_norm": 0.306640625, "learning_rate": 4.989920008077601e-05, "loss": 2.3303, "step": 1435 }, { "epoch": 0.19, "grad_norm": 0.3125, "learning_rate": 4.989901011005114e-05, "loss": 2.3587, "step": 1436 }, { "epoch": 0.19, "grad_norm": 0.2890625, "learning_rate": 4.9898819960844634e-05, "loss": 2.3334, "step": 1437 }, { "epoch": 0.19, "grad_norm": 0.298828125, "learning_rate": 4.9898629633157856e-05, "loss": 2.3202, "step": 1438 }, { "epoch": 0.19, "grad_norm": 0.3046875, "learning_rate": 4.9898439126992156e-05, "loss": 2.3305, "step": 1439 }, { "epoch": 0.19, "grad_norm": 0.283203125, "learning_rate": 4.989824844234892e-05, "loss": 2.2926, "step": 1440 }, { "epoch": 0.19, "grad_norm": 0.306640625, "learning_rate": 4.989805757922949e-05, "loss": 2.2926, "step": 1441 }, { "epoch": 0.19, "grad_norm": 0.302734375, "learning_rate": 4.989786653763527e-05, "loss": 2.3315, "step": 1442 }, { "epoch": 0.19, "grad_norm": 0.30078125, "learning_rate": 4.9897675317567594e-05, "loss": 2.3289, "step": 1443 }, { "epoch": 0.19, "grad_norm": 0.318359375, "learning_rate": 4.989748391902785e-05, "loss": 2.302, "step": 1444 }, { "epoch": 0.19, "grad_norm": 0.3125, "learning_rate": 4.989729234201741e-05, "loss": 2.3142, "step": 1445 }, { "epoch": 0.19, "grad_norm": 0.294921875, "learning_rate": 4.9897100586537634e-05, "loss": 2.3085, "step": 1446 }, { "epoch": 0.19, "grad_norm": 0.302734375, "learning_rate": 4.9896908652589916e-05, "loss": 2.2957, "step": 1447 }, { "epoch": 0.19, "grad_norm": 0.322265625, "learning_rate": 4.9896716540175614e-05, "loss": 2.3178, "step": 1448 }, { "epoch": 0.19, "grad_norm": 0.30859375, "learning_rate": 4.989652424929612e-05, "loss": 2.3029, "step": 1449 }, { "epoch": 0.19, "grad_norm": 0.28515625, "learning_rate": 4.989633177995281e-05, "loss": 2.3356, "step": 1450 }, { "epoch": 0.19, "grad_norm": 0.296875, "learning_rate": 4.989613913214705e-05, "loss": 2.3428, "step": 1451 }, { "epoch": 0.19, "grad_norm": 0.302734375, "learning_rate": 4.989594630588024e-05, "loss": 2.3268, "step": 1452 }, { "epoch": 0.19, "grad_norm": 0.306640625, "learning_rate": 4.9895753301153744e-05, "loss": 2.311, "step": 1453 }, { "epoch": 0.19, "grad_norm": 0.294921875, "learning_rate": 4.9895560117968956e-05, "loss": 2.315, "step": 1454 }, { "epoch": 0.19, "grad_norm": 0.326171875, "learning_rate": 4.989536675632726e-05, "loss": 2.3194, "step": 1455 }, { "epoch": 0.19, "grad_norm": 0.294921875, "learning_rate": 4.989517321623004e-05, "loss": 2.3123, "step": 1456 }, { "epoch": 0.19, "grad_norm": 0.30859375, "learning_rate": 4.989497949767869e-05, "loss": 2.3286, "step": 1457 }, { "epoch": 0.19, "grad_norm": 0.314453125, "learning_rate": 4.989478560067459e-05, "loss": 2.289, "step": 1458 }, { "epoch": 0.19, "grad_norm": 0.328125, "learning_rate": 4.9894591525219134e-05, "loss": 2.3554, "step": 1459 }, { "epoch": 0.19, "grad_norm": 0.314453125, "learning_rate": 4.9894397271313706e-05, "loss": 2.3225, "step": 1460 }, { "epoch": 0.19, "grad_norm": 0.298828125, "learning_rate": 4.9894202838959706e-05, "loss": 2.2989, "step": 1461 }, { "epoch": 0.2, "grad_norm": 0.314453125, "learning_rate": 4.989400822815853e-05, "loss": 2.3261, "step": 1462 }, { "epoch": 0.2, "grad_norm": 0.29296875, "learning_rate": 4.989381343891157e-05, "loss": 2.3074, "step": 1463 }, { "epoch": 0.2, "grad_norm": 0.314453125, "learning_rate": 4.989361847122021e-05, "loss": 2.3079, "step": 1464 }, { "epoch": 0.2, "grad_norm": 0.296875, "learning_rate": 4.9893423325085874e-05, "loss": 2.3305, "step": 1465 }, { "epoch": 0.2, "grad_norm": 0.294921875, "learning_rate": 4.989322800050994e-05, "loss": 2.3134, "step": 1466 }, { "epoch": 0.2, "grad_norm": 0.34375, "learning_rate": 4.9893032497493816e-05, "loss": 2.2942, "step": 1467 }, { "epoch": 0.2, "grad_norm": 0.296875, "learning_rate": 4.98928368160389e-05, "loss": 2.3177, "step": 1468 }, { "epoch": 0.2, "grad_norm": 0.296875, "learning_rate": 4.98926409561466e-05, "loss": 2.3137, "step": 1469 }, { "epoch": 0.2, "grad_norm": 0.294921875, "learning_rate": 4.9892444917818316e-05, "loss": 2.3339, "step": 1470 }, { "epoch": 0.2, "grad_norm": 0.314453125, "learning_rate": 4.9892248701055446e-05, "loss": 2.303, "step": 1471 }, { "epoch": 0.2, "grad_norm": 0.302734375, "learning_rate": 4.989205230585941e-05, "loss": 2.2997, "step": 1472 }, { "epoch": 0.2, "grad_norm": 0.3046875, "learning_rate": 4.989185573223161e-05, "loss": 2.3211, "step": 1473 }, { "epoch": 0.2, "grad_norm": 0.30859375, "learning_rate": 4.989165898017345e-05, "loss": 2.3088, "step": 1474 }, { "epoch": 0.2, "grad_norm": 0.30859375, "learning_rate": 4.989146204968635e-05, "loss": 2.3326, "step": 1475 }, { "epoch": 0.2, "grad_norm": 0.291015625, "learning_rate": 4.9891264940771715e-05, "loss": 2.2969, "step": 1476 }, { "epoch": 0.2, "grad_norm": 0.310546875, "learning_rate": 4.9891067653430954e-05, "loss": 2.3121, "step": 1477 }, { "epoch": 0.2, "grad_norm": 0.32421875, "learning_rate": 4.9890870187665494e-05, "loss": 2.3592, "step": 1478 }, { "epoch": 0.2, "grad_norm": 0.30078125, "learning_rate": 4.9890672543476746e-05, "loss": 2.341, "step": 1479 }, { "epoch": 0.2, "grad_norm": 0.32421875, "learning_rate": 4.989047472086612e-05, "loss": 2.2968, "step": 1480 }, { "epoch": 0.2, "grad_norm": 0.279296875, "learning_rate": 4.989027671983504e-05, "loss": 2.313, "step": 1481 }, { "epoch": 0.2, "grad_norm": 0.310546875, "learning_rate": 4.989007854038492e-05, "loss": 2.3087, "step": 1482 }, { "epoch": 0.2, "grad_norm": 0.306640625, "learning_rate": 4.988988018251719e-05, "loss": 2.3264, "step": 1483 }, { "epoch": 0.2, "grad_norm": 0.29296875, "learning_rate": 4.988968164623326e-05, "loss": 2.3022, "step": 1484 }, { "epoch": 0.2, "grad_norm": 0.328125, "learning_rate": 4.988948293153456e-05, "loss": 2.329, "step": 1485 }, { "epoch": 0.2, "grad_norm": 0.28515625, "learning_rate": 4.988928403842252e-05, "loss": 2.3005, "step": 1486 }, { "epoch": 0.2, "grad_norm": 0.2890625, "learning_rate": 4.988908496689855e-05, "loss": 2.2981, "step": 1487 }, { "epoch": 0.2, "grad_norm": 0.3203125, "learning_rate": 4.9888885716964094e-05, "loss": 2.3038, "step": 1488 }, { "epoch": 0.2, "grad_norm": 0.306640625, "learning_rate": 4.9888686288620565e-05, "loss": 2.3023, "step": 1489 }, { "epoch": 0.2, "grad_norm": 0.322265625, "learning_rate": 4.988848668186941e-05, "loss": 2.2915, "step": 1490 }, { "epoch": 0.2, "grad_norm": 0.3203125, "learning_rate": 4.988828689671204e-05, "loss": 2.3294, "step": 1491 }, { "epoch": 0.2, "grad_norm": 0.318359375, "learning_rate": 4.98880869331499e-05, "loss": 2.3331, "step": 1492 }, { "epoch": 0.2, "grad_norm": 0.28515625, "learning_rate": 4.988788679118442e-05, "loss": 2.3344, "step": 1493 }, { "epoch": 0.2, "grad_norm": 0.322265625, "learning_rate": 4.988768647081704e-05, "loss": 2.3352, "step": 1494 }, { "epoch": 0.2, "grad_norm": 0.31640625, "learning_rate": 4.9887485972049195e-05, "loss": 2.317, "step": 1495 }, { "epoch": 0.2, "grad_norm": 0.3046875, "learning_rate": 4.9887285294882315e-05, "loss": 2.2972, "step": 1496 }, { "epoch": 0.2, "grad_norm": 0.3046875, "learning_rate": 4.9887084439317834e-05, "loss": 2.3193, "step": 1497 }, { "epoch": 0.2, "grad_norm": 0.29296875, "learning_rate": 4.98868834053572e-05, "loss": 2.3015, "step": 1498 }, { "epoch": 0.2, "grad_norm": 0.310546875, "learning_rate": 4.988668219300186e-05, "loss": 2.3257, "step": 1499 }, { "epoch": 0.2, "grad_norm": 0.3046875, "learning_rate": 4.9886480802253246e-05, "loss": 2.3029, "step": 1500 }, { "epoch": 0.2, "grad_norm": 0.33984375, "learning_rate": 4.988627923311281e-05, "loss": 2.3055, "step": 1501 }, { "epoch": 0.2, "grad_norm": 0.322265625, "learning_rate": 4.9886077485581987e-05, "loss": 2.3289, "step": 1502 }, { "epoch": 0.2, "grad_norm": 0.314453125, "learning_rate": 4.988587555966223e-05, "loss": 2.3123, "step": 1503 }, { "epoch": 0.2, "grad_norm": 0.322265625, "learning_rate": 4.988567345535499e-05, "loss": 2.3315, "step": 1504 }, { "epoch": 0.2, "grad_norm": 0.333984375, "learning_rate": 4.98854711726617e-05, "loss": 2.3285, "step": 1505 }, { "epoch": 0.2, "grad_norm": 0.2890625, "learning_rate": 4.9885268711583835e-05, "loss": 2.3351, "step": 1506 }, { "epoch": 0.2, "grad_norm": 0.306640625, "learning_rate": 4.9885066072122824e-05, "loss": 2.296, "step": 1507 }, { "epoch": 0.2, "grad_norm": 0.318359375, "learning_rate": 4.9884863254280124e-05, "loss": 2.3086, "step": 1508 }, { "epoch": 0.2, "grad_norm": 0.3125, "learning_rate": 4.9884660258057204e-05, "loss": 2.3426, "step": 1509 }, { "epoch": 0.2, "grad_norm": 0.287109375, "learning_rate": 4.9884457083455495e-05, "loss": 2.3243, "step": 1510 }, { "epoch": 0.2, "grad_norm": 0.302734375, "learning_rate": 4.9884253730476475e-05, "loss": 2.3028, "step": 1511 }, { "epoch": 0.2, "grad_norm": 0.28515625, "learning_rate": 4.988405019912159e-05, "loss": 2.3335, "step": 1512 }, { "epoch": 0.2, "grad_norm": 0.3046875, "learning_rate": 4.98838464893923e-05, "loss": 2.3123, "step": 1513 }, { "epoch": 0.2, "grad_norm": 0.33203125, "learning_rate": 4.988364260129007e-05, "loss": 2.3056, "step": 1514 }, { "epoch": 0.2, "grad_norm": 0.30078125, "learning_rate": 4.9883438534816354e-05, "loss": 2.3066, "step": 1515 }, { "epoch": 0.2, "grad_norm": 0.310546875, "learning_rate": 4.988323428997262e-05, "loss": 2.3226, "step": 1516 }, { "epoch": 0.2, "grad_norm": 0.333984375, "learning_rate": 4.9883029866760334e-05, "loss": 2.3419, "step": 1517 }, { "epoch": 0.2, "grad_norm": 0.30078125, "learning_rate": 4.9882825265180955e-05, "loss": 2.3172, "step": 1518 }, { "epoch": 0.2, "grad_norm": 0.279296875, "learning_rate": 4.988262048523595e-05, "loss": 2.3304, "step": 1519 }, { "epoch": 0.2, "grad_norm": 0.32421875, "learning_rate": 4.98824155269268e-05, "loss": 2.309, "step": 1520 }, { "epoch": 0.2, "grad_norm": 0.31640625, "learning_rate": 4.988221039025496e-05, "loss": 2.3095, "step": 1521 }, { "epoch": 0.2, "grad_norm": 0.294921875, "learning_rate": 4.988200507522191e-05, "loss": 2.312, "step": 1522 }, { "epoch": 0.2, "grad_norm": 0.31640625, "learning_rate": 4.988179958182911e-05, "loss": 2.3032, "step": 1523 }, { "epoch": 0.2, "grad_norm": 0.294921875, "learning_rate": 4.988159391007804e-05, "loss": 2.3271, "step": 1524 }, { "epoch": 0.2, "grad_norm": 0.302734375, "learning_rate": 4.988138805997018e-05, "loss": 2.3178, "step": 1525 }, { "epoch": 0.2, "grad_norm": 0.30078125, "learning_rate": 4.988118203150699e-05, "loss": 2.3173, "step": 1526 }, { "epoch": 0.2, "grad_norm": 0.30859375, "learning_rate": 4.988097582468996e-05, "loss": 2.3205, "step": 1527 }, { "epoch": 0.2, "grad_norm": 0.298828125, "learning_rate": 4.9880769439520565e-05, "loss": 2.3137, "step": 1528 }, { "epoch": 0.2, "grad_norm": 0.291015625, "learning_rate": 4.988056287600028e-05, "loss": 2.2933, "step": 1529 }, { "epoch": 0.2, "grad_norm": 0.28515625, "learning_rate": 4.98803561341306e-05, "loss": 2.3213, "step": 1530 }, { "epoch": 0.2, "grad_norm": 0.333984375, "learning_rate": 4.9880149213912985e-05, "loss": 2.3183, "step": 1531 }, { "epoch": 0.2, "grad_norm": 0.291015625, "learning_rate": 4.987994211534894e-05, "loss": 2.2852, "step": 1532 }, { "epoch": 0.2, "grad_norm": 0.30078125, "learning_rate": 4.987973483843993e-05, "loss": 2.312, "step": 1533 }, { "epoch": 0.2, "grad_norm": 0.28515625, "learning_rate": 4.987952738318745e-05, "loss": 2.3122, "step": 1534 }, { "epoch": 0.2, "grad_norm": 0.298828125, "learning_rate": 4.9879319749592995e-05, "loss": 2.3337, "step": 1535 }, { "epoch": 0.2, "grad_norm": 0.30078125, "learning_rate": 4.987911193765804e-05, "loss": 2.2903, "step": 1536 }, { "epoch": 0.21, "grad_norm": 0.28125, "learning_rate": 4.987890394738409e-05, "loss": 2.3039, "step": 1537 }, { "epoch": 0.21, "grad_norm": 0.306640625, "learning_rate": 4.987869577877261e-05, "loss": 2.2836, "step": 1538 }, { "epoch": 0.21, "grad_norm": 0.318359375, "learning_rate": 4.987848743182512e-05, "loss": 2.324, "step": 1539 }, { "epoch": 0.21, "grad_norm": 0.30859375, "learning_rate": 4.9878278906543097e-05, "loss": 2.2848, "step": 1540 }, { "epoch": 0.21, "grad_norm": 0.296875, "learning_rate": 4.9878070202928044e-05, "loss": 2.3127, "step": 1541 }, { "epoch": 0.21, "grad_norm": 0.294921875, "learning_rate": 4.9877861320981454e-05, "loss": 2.3051, "step": 1542 }, { "epoch": 0.21, "grad_norm": 0.310546875, "learning_rate": 4.987765226070482e-05, "loss": 2.34, "step": 1543 }, { "epoch": 0.21, "grad_norm": 0.310546875, "learning_rate": 4.987744302209965e-05, "loss": 2.3388, "step": 1544 }, { "epoch": 0.21, "grad_norm": 0.28125, "learning_rate": 4.987723360516743e-05, "loss": 2.3288, "step": 1545 }, { "epoch": 0.21, "grad_norm": 0.28125, "learning_rate": 4.987702400990968e-05, "loss": 2.2869, "step": 1546 }, { "epoch": 0.21, "grad_norm": 0.3125, "learning_rate": 4.9876814236327886e-05, "loss": 2.2977, "step": 1547 }, { "epoch": 0.21, "grad_norm": 0.302734375, "learning_rate": 4.9876604284423554e-05, "loss": 2.3244, "step": 1548 }, { "epoch": 0.21, "grad_norm": 0.326171875, "learning_rate": 4.9876394154198193e-05, "loss": 2.3271, "step": 1549 }, { "epoch": 0.21, "grad_norm": 0.2734375, "learning_rate": 4.9876183845653316e-05, "loss": 2.3017, "step": 1550 }, { "epoch": 0.21, "grad_norm": 0.296875, "learning_rate": 4.987597335879042e-05, "loss": 2.3351, "step": 1551 }, { "epoch": 0.21, "grad_norm": 0.28515625, "learning_rate": 4.987576269361102e-05, "loss": 2.3184, "step": 1552 }, { "epoch": 0.21, "grad_norm": 0.27734375, "learning_rate": 4.9875551850116615e-05, "loss": 2.3144, "step": 1553 }, { "epoch": 0.21, "grad_norm": 0.310546875, "learning_rate": 4.9875340828308724e-05, "loss": 2.3101, "step": 1554 }, { "epoch": 0.21, "grad_norm": 0.3203125, "learning_rate": 4.987512962818887e-05, "loss": 2.3264, "step": 1555 }, { "epoch": 0.21, "grad_norm": 0.302734375, "learning_rate": 4.987491824975855e-05, "loss": 2.3167, "step": 1556 }, { "epoch": 0.21, "grad_norm": 0.3046875, "learning_rate": 4.987470669301929e-05, "loss": 2.336, "step": 1557 }, { "epoch": 0.21, "grad_norm": 0.310546875, "learning_rate": 4.987449495797261e-05, "loss": 2.3154, "step": 1558 }, { "epoch": 0.21, "grad_norm": 0.30078125, "learning_rate": 4.9874283044620006e-05, "loss": 2.3337, "step": 1559 }, { "epoch": 0.21, "grad_norm": 0.291015625, "learning_rate": 4.9874070952963025e-05, "loss": 2.3218, "step": 1560 }, { "epoch": 0.21, "grad_norm": 0.3203125, "learning_rate": 4.987385868300317e-05, "loss": 2.3095, "step": 1561 }, { "epoch": 0.21, "grad_norm": 0.310546875, "learning_rate": 4.987364623474196e-05, "loss": 2.3072, "step": 1562 }, { "epoch": 0.21, "grad_norm": 0.29296875, "learning_rate": 4.987343360818093e-05, "loss": 2.3075, "step": 1563 }, { "epoch": 0.21, "grad_norm": 0.294921875, "learning_rate": 4.98732208033216e-05, "loss": 2.3052, "step": 1564 }, { "epoch": 0.21, "grad_norm": 0.3125, "learning_rate": 4.98730078201655e-05, "loss": 2.3145, "step": 1565 }, { "epoch": 0.21, "grad_norm": 0.298828125, "learning_rate": 4.9872794658714136e-05, "loss": 2.3476, "step": 1566 }, { "epoch": 0.21, "grad_norm": 0.3125, "learning_rate": 4.9872581318969056e-05, "loss": 2.29, "step": 1567 }, { "epoch": 0.21, "grad_norm": 0.31640625, "learning_rate": 4.9872367800931794e-05, "loss": 2.3175, "step": 1568 }, { "epoch": 0.21, "grad_norm": 0.30859375, "learning_rate": 4.987215410460386e-05, "loss": 2.3172, "step": 1569 }, { "epoch": 0.21, "grad_norm": 0.330078125, "learning_rate": 4.98719402299868e-05, "loss": 2.3145, "step": 1570 }, { "epoch": 0.21, "grad_norm": 0.30859375, "learning_rate": 4.987172617708215e-05, "loss": 2.2967, "step": 1571 }, { "epoch": 0.21, "grad_norm": 0.302734375, "learning_rate": 4.9871511945891425e-05, "loss": 2.2966, "step": 1572 }, { "epoch": 0.21, "grad_norm": 0.29296875, "learning_rate": 4.9871297536416184e-05, "loss": 2.3068, "step": 1573 }, { "epoch": 0.21, "grad_norm": 0.2890625, "learning_rate": 4.987108294865794e-05, "loss": 2.3179, "step": 1574 }, { "epoch": 0.21, "grad_norm": 0.306640625, "learning_rate": 4.987086818261826e-05, "loss": 2.3572, "step": 1575 }, { "epoch": 0.21, "grad_norm": 0.314453125, "learning_rate": 4.987065323829866e-05, "loss": 2.3288, "step": 1576 }, { "epoch": 0.21, "grad_norm": 0.306640625, "learning_rate": 4.987043811570069e-05, "loss": 2.3293, "step": 1577 }, { "epoch": 0.21, "grad_norm": 0.294921875, "learning_rate": 4.98702228148259e-05, "loss": 2.3205, "step": 1578 }, { "epoch": 0.21, "grad_norm": 0.298828125, "learning_rate": 4.987000733567582e-05, "loss": 2.3183, "step": 1579 }, { "epoch": 0.21, "grad_norm": 0.310546875, "learning_rate": 4.986979167825199e-05, "loss": 2.308, "step": 1580 }, { "epoch": 0.21, "grad_norm": 0.283203125, "learning_rate": 4.986957584255597e-05, "loss": 2.3292, "step": 1581 }, { "epoch": 0.21, "grad_norm": 0.302734375, "learning_rate": 4.98693598285893e-05, "loss": 2.323, "step": 1582 }, { "epoch": 0.21, "grad_norm": 0.3125, "learning_rate": 4.9869143636353537e-05, "loss": 2.346, "step": 1583 }, { "epoch": 0.21, "grad_norm": 0.30078125, "learning_rate": 4.986892726585022e-05, "loss": 2.3075, "step": 1584 }, { "epoch": 0.21, "grad_norm": 0.298828125, "learning_rate": 4.986871071708091e-05, "loss": 2.3374, "step": 1585 }, { "epoch": 0.21, "grad_norm": 0.306640625, "learning_rate": 4.986849399004715e-05, "loss": 2.314, "step": 1586 }, { "epoch": 0.21, "grad_norm": 0.3046875, "learning_rate": 4.9868277084750493e-05, "loss": 2.3059, "step": 1587 }, { "epoch": 0.21, "grad_norm": 0.291015625, "learning_rate": 4.9868060001192495e-05, "loss": 2.3086, "step": 1588 }, { "epoch": 0.21, "grad_norm": 0.298828125, "learning_rate": 4.9867842739374726e-05, "loss": 2.3005, "step": 1589 }, { "epoch": 0.21, "grad_norm": 0.30078125, "learning_rate": 4.9867625299298724e-05, "loss": 2.3248, "step": 1590 }, { "epoch": 0.21, "grad_norm": 0.2890625, "learning_rate": 4.986740768096606e-05, "loss": 2.3213, "step": 1591 }, { "epoch": 0.21, "grad_norm": 0.30078125, "learning_rate": 4.986718988437829e-05, "loss": 2.3146, "step": 1592 }, { "epoch": 0.21, "grad_norm": 0.27734375, "learning_rate": 4.9866971909536975e-05, "loss": 2.3229, "step": 1593 }, { "epoch": 0.21, "grad_norm": 0.310546875, "learning_rate": 4.9866753756443675e-05, "loss": 2.2802, "step": 1594 }, { "epoch": 0.21, "grad_norm": 0.287109375, "learning_rate": 4.986653542509996e-05, "loss": 2.3324, "step": 1595 }, { "epoch": 0.21, "grad_norm": 0.314453125, "learning_rate": 4.9866316915507387e-05, "loss": 2.2983, "step": 1596 }, { "epoch": 0.21, "grad_norm": 0.298828125, "learning_rate": 4.986609822766753e-05, "loss": 2.2936, "step": 1597 }, { "epoch": 0.21, "grad_norm": 0.291015625, "learning_rate": 4.986587936158195e-05, "loss": 2.2952, "step": 1598 }, { "epoch": 0.21, "grad_norm": 0.302734375, "learning_rate": 4.986566031725222e-05, "loss": 2.3053, "step": 1599 }, { "epoch": 0.21, "grad_norm": 0.298828125, "learning_rate": 4.986544109467992e-05, "loss": 2.3162, "step": 1600 }, { "epoch": 0.21, "grad_norm": 0.3125, "learning_rate": 4.986522169386659e-05, "loss": 2.3328, "step": 1601 }, { "epoch": 0.21, "grad_norm": 0.310546875, "learning_rate": 4.986500211481384e-05, "loss": 2.3145, "step": 1602 }, { "epoch": 0.21, "grad_norm": 0.322265625, "learning_rate": 4.9864782357523223e-05, "loss": 2.3021, "step": 1603 }, { "epoch": 0.21, "grad_norm": 0.3046875, "learning_rate": 4.986456242199632e-05, "loss": 2.3201, "step": 1604 }, { "epoch": 0.21, "grad_norm": 0.302734375, "learning_rate": 4.9864342308234705e-05, "loss": 2.3315, "step": 1605 }, { "epoch": 0.21, "grad_norm": 0.302734375, "learning_rate": 4.9864122016239955e-05, "loss": 2.3272, "step": 1606 }, { "epoch": 0.21, "grad_norm": 0.298828125, "learning_rate": 4.986390154601366e-05, "loss": 2.3152, "step": 1607 }, { "epoch": 0.21, "grad_norm": 0.298828125, "learning_rate": 4.9863680897557385e-05, "loss": 2.3085, "step": 1608 }, { "epoch": 0.21, "grad_norm": 0.31640625, "learning_rate": 4.9863460070872716e-05, "loss": 2.3222, "step": 1609 }, { "epoch": 0.21, "grad_norm": 0.3203125, "learning_rate": 4.986323906596124e-05, "loss": 2.361, "step": 1610 }, { "epoch": 0.21, "grad_norm": 0.328125, "learning_rate": 4.986301788282454e-05, "loss": 2.3217, "step": 1611 }, { "epoch": 0.22, "grad_norm": 0.302734375, "learning_rate": 4.986279652146421e-05, "loss": 2.3044, "step": 1612 }, { "epoch": 0.22, "grad_norm": 0.314453125, "learning_rate": 4.9862574981881814e-05, "loss": 2.3176, "step": 1613 }, { "epoch": 0.22, "grad_norm": 0.306640625, "learning_rate": 4.986235326407896e-05, "loss": 2.3178, "step": 1614 }, { "epoch": 0.22, "grad_norm": 0.287109375, "learning_rate": 4.986213136805723e-05, "loss": 2.2839, "step": 1615 }, { "epoch": 0.22, "grad_norm": 0.3125, "learning_rate": 4.9861909293818215e-05, "loss": 2.3272, "step": 1616 }, { "epoch": 0.22, "grad_norm": 0.333984375, "learning_rate": 4.986168704136351e-05, "loss": 2.2887, "step": 1617 }, { "epoch": 0.22, "grad_norm": 0.298828125, "learning_rate": 4.98614646106947e-05, "loss": 2.3208, "step": 1618 }, { "epoch": 0.22, "grad_norm": 0.306640625, "learning_rate": 4.9861242001813394e-05, "loss": 2.2828, "step": 1619 }, { "epoch": 0.22, "grad_norm": 0.294921875, "learning_rate": 4.986101921472117e-05, "loss": 2.3442, "step": 1620 }, { "epoch": 0.22, "grad_norm": 0.310546875, "learning_rate": 4.986079624941964e-05, "loss": 2.3006, "step": 1621 }, { "epoch": 0.22, "grad_norm": 0.328125, "learning_rate": 4.986057310591039e-05, "loss": 2.2945, "step": 1622 }, { "epoch": 0.22, "grad_norm": 0.326171875, "learning_rate": 4.986034978419504e-05, "loss": 2.33, "step": 1623 }, { "epoch": 0.22, "grad_norm": 0.279296875, "learning_rate": 4.986012628427516e-05, "loss": 2.3275, "step": 1624 }, { "epoch": 0.22, "grad_norm": 0.30078125, "learning_rate": 4.9859902606152375e-05, "loss": 2.3029, "step": 1625 }, { "epoch": 0.22, "grad_norm": 0.333984375, "learning_rate": 4.985967874982828e-05, "loss": 2.3071, "step": 1626 }, { "epoch": 0.22, "grad_norm": 0.29296875, "learning_rate": 4.985945471530449e-05, "loss": 2.3046, "step": 1627 }, { "epoch": 0.22, "grad_norm": 0.30859375, "learning_rate": 4.985923050258259e-05, "loss": 2.3044, "step": 1628 }, { "epoch": 0.22, "grad_norm": 0.3046875, "learning_rate": 4.9859006111664206e-05, "loss": 2.3137, "step": 1629 }, { "epoch": 0.22, "grad_norm": 0.30859375, "learning_rate": 4.985878154255094e-05, "loss": 2.2932, "step": 1630 }, { "epoch": 0.22, "grad_norm": 0.302734375, "learning_rate": 4.98585567952444e-05, "loss": 2.3001, "step": 1631 }, { "epoch": 0.22, "grad_norm": 0.2890625, "learning_rate": 4.9858331869746205e-05, "loss": 2.293, "step": 1632 }, { "epoch": 0.22, "grad_norm": 0.306640625, "learning_rate": 4.985810676605795e-05, "loss": 2.3292, "step": 1633 }, { "epoch": 0.22, "grad_norm": 0.30078125, "learning_rate": 4.9857881484181275e-05, "loss": 2.3359, "step": 1634 }, { "epoch": 0.22, "grad_norm": 0.30078125, "learning_rate": 4.9857656024117774e-05, "loss": 2.3275, "step": 1635 }, { "epoch": 0.22, "grad_norm": 0.310546875, "learning_rate": 4.985743038586906e-05, "loss": 2.325, "step": 1636 }, { "epoch": 0.22, "grad_norm": 0.294921875, "learning_rate": 4.985720456943677e-05, "loss": 2.2974, "step": 1637 }, { "epoch": 0.22, "grad_norm": 0.302734375, "learning_rate": 4.985697857482251e-05, "loss": 2.3425, "step": 1638 }, { "epoch": 0.22, "grad_norm": 0.283203125, "learning_rate": 4.98567524020279e-05, "loss": 2.2915, "step": 1639 }, { "epoch": 0.22, "grad_norm": 0.3125, "learning_rate": 4.985652605105456e-05, "loss": 2.3206, "step": 1640 }, { "epoch": 0.22, "grad_norm": 0.2890625, "learning_rate": 4.9856299521904124e-05, "loss": 2.2977, "step": 1641 }, { "epoch": 0.22, "grad_norm": 0.28515625, "learning_rate": 4.9856072814578204e-05, "loss": 2.3072, "step": 1642 }, { "epoch": 0.22, "grad_norm": 0.30078125, "learning_rate": 4.985584592907842e-05, "loss": 2.3117, "step": 1643 }, { "epoch": 0.22, "grad_norm": 0.287109375, "learning_rate": 4.985561886540643e-05, "loss": 2.2989, "step": 1644 }, { "epoch": 0.22, "grad_norm": 0.29296875, "learning_rate": 4.985539162356382e-05, "loss": 2.3218, "step": 1645 }, { "epoch": 0.22, "grad_norm": 0.294921875, "learning_rate": 4.985516420355224e-05, "loss": 2.312, "step": 1646 }, { "epoch": 0.22, "grad_norm": 0.302734375, "learning_rate": 4.9854936605373324e-05, "loss": 2.3626, "step": 1647 }, { "epoch": 0.22, "grad_norm": 0.296875, "learning_rate": 4.985470882902869e-05, "loss": 2.3061, "step": 1648 }, { "epoch": 0.22, "grad_norm": 0.279296875, "learning_rate": 4.985448087451998e-05, "loss": 2.3037, "step": 1649 }, { "epoch": 0.22, "grad_norm": 0.29296875, "learning_rate": 4.9854252741848826e-05, "loss": 2.3099, "step": 1650 }, { "epoch": 0.22, "grad_norm": 0.294921875, "learning_rate": 4.985402443101687e-05, "loss": 2.3008, "step": 1651 }, { "epoch": 0.22, "grad_norm": 0.2890625, "learning_rate": 4.985379594202574e-05, "loss": 2.2785, "step": 1652 }, { "epoch": 0.22, "grad_norm": 0.287109375, "learning_rate": 4.985356727487707e-05, "loss": 2.3201, "step": 1653 }, { "epoch": 0.22, "grad_norm": 0.27734375, "learning_rate": 4.9853338429572505e-05, "loss": 2.2755, "step": 1654 }, { "epoch": 0.22, "grad_norm": 0.306640625, "learning_rate": 4.985310940611369e-05, "loss": 2.332, "step": 1655 }, { "epoch": 0.22, "grad_norm": 0.287109375, "learning_rate": 4.985288020450226e-05, "loss": 2.3097, "step": 1656 }, { "epoch": 0.22, "grad_norm": 0.298828125, "learning_rate": 4.985265082473987e-05, "loss": 2.3292, "step": 1657 }, { "epoch": 0.22, "grad_norm": 0.3046875, "learning_rate": 4.985242126682814e-05, "loss": 2.3339, "step": 1658 }, { "epoch": 0.22, "grad_norm": 0.283203125, "learning_rate": 4.985219153076873e-05, "loss": 2.3002, "step": 1659 }, { "epoch": 0.22, "grad_norm": 0.298828125, "learning_rate": 4.9851961616563295e-05, "loss": 2.3326, "step": 1660 }, { "epoch": 0.22, "grad_norm": 0.298828125, "learning_rate": 4.9851731524213465e-05, "loss": 2.3249, "step": 1661 }, { "epoch": 0.22, "grad_norm": 0.302734375, "learning_rate": 4.985150125372091e-05, "loss": 2.3354, "step": 1662 }, { "epoch": 0.22, "grad_norm": 0.287109375, "learning_rate": 4.985127080508727e-05, "loss": 2.2756, "step": 1663 }, { "epoch": 0.22, "grad_norm": 0.298828125, "learning_rate": 4.985104017831419e-05, "loss": 2.3052, "step": 1664 }, { "epoch": 0.22, "grad_norm": 0.296875, "learning_rate": 4.9850809373403326e-05, "loss": 2.3169, "step": 1665 }, { "epoch": 0.22, "grad_norm": 0.29296875, "learning_rate": 4.9850578390356347e-05, "loss": 2.3016, "step": 1666 }, { "epoch": 0.22, "grad_norm": 0.2890625, "learning_rate": 4.985034722917489e-05, "loss": 2.2934, "step": 1667 }, { "epoch": 0.22, "grad_norm": 0.2890625, "learning_rate": 4.985011588986063e-05, "loss": 2.3017, "step": 1668 }, { "epoch": 0.22, "grad_norm": 0.2890625, "learning_rate": 4.98498843724152e-05, "loss": 2.3243, "step": 1669 }, { "epoch": 0.22, "grad_norm": 0.294921875, "learning_rate": 4.9849652676840284e-05, "loss": 2.3091, "step": 1670 }, { "epoch": 0.22, "grad_norm": 0.29296875, "learning_rate": 4.984942080313754e-05, "loss": 2.2973, "step": 1671 }, { "epoch": 0.22, "grad_norm": 0.30078125, "learning_rate": 4.984918875130861e-05, "loss": 2.3243, "step": 1672 }, { "epoch": 0.22, "grad_norm": 0.294921875, "learning_rate": 4.984895652135518e-05, "loss": 2.2982, "step": 1673 }, { "epoch": 0.22, "grad_norm": 0.29296875, "learning_rate": 4.98487241132789e-05, "loss": 2.3165, "step": 1674 }, { "epoch": 0.22, "grad_norm": 0.28515625, "learning_rate": 4.984849152708145e-05, "loss": 2.3423, "step": 1675 }, { "epoch": 0.22, "grad_norm": 0.294921875, "learning_rate": 4.9848258762764486e-05, "loss": 2.3147, "step": 1676 }, { "epoch": 0.22, "grad_norm": 0.310546875, "learning_rate": 4.984802582032968e-05, "loss": 2.304, "step": 1677 }, { "epoch": 0.22, "grad_norm": 0.3125, "learning_rate": 4.9847792699778694e-05, "loss": 2.3361, "step": 1678 }, { "epoch": 0.22, "grad_norm": 0.318359375, "learning_rate": 4.984755940111321e-05, "loss": 2.3129, "step": 1679 }, { "epoch": 0.22, "grad_norm": 0.296875, "learning_rate": 4.98473259243349e-05, "loss": 2.2839, "step": 1680 }, { "epoch": 0.22, "grad_norm": 0.294921875, "learning_rate": 4.984709226944544e-05, "loss": 2.2878, "step": 1681 }, { "epoch": 0.22, "grad_norm": 0.302734375, "learning_rate": 4.984685843644649e-05, "loss": 2.3526, "step": 1682 }, { "epoch": 0.22, "grad_norm": 0.29296875, "learning_rate": 4.984662442533974e-05, "loss": 2.3174, "step": 1683 }, { "epoch": 0.22, "grad_norm": 0.298828125, "learning_rate": 4.9846390236126863e-05, "loss": 2.2837, "step": 1684 }, { "epoch": 0.22, "grad_norm": 0.32421875, "learning_rate": 4.984615586880953e-05, "loss": 2.2962, "step": 1685 }, { "epoch": 0.22, "grad_norm": 0.2890625, "learning_rate": 4.984592132338944e-05, "loss": 2.3383, "step": 1686 }, { "epoch": 0.23, "grad_norm": 0.33203125, "learning_rate": 4.984568659986826e-05, "loss": 2.3093, "step": 1687 }, { "epoch": 0.23, "grad_norm": 0.3125, "learning_rate": 4.984545169824767e-05, "loss": 2.3049, "step": 1688 }, { "epoch": 0.23, "grad_norm": 0.30078125, "learning_rate": 4.984521661852936e-05, "loss": 2.3462, "step": 1689 }, { "epoch": 0.23, "grad_norm": 0.287109375, "learning_rate": 4.9844981360715014e-05, "loss": 2.3094, "step": 1690 }, { "epoch": 0.23, "grad_norm": 0.291015625, "learning_rate": 4.9844745924806324e-05, "loss": 2.3219, "step": 1691 }, { "epoch": 0.23, "grad_norm": 0.30078125, "learning_rate": 4.984451031080497e-05, "loss": 2.2896, "step": 1692 }, { "epoch": 0.23, "grad_norm": 0.291015625, "learning_rate": 4.984427451871264e-05, "loss": 2.3118, "step": 1693 }, { "epoch": 0.23, "grad_norm": 0.302734375, "learning_rate": 4.984403854853104e-05, "loss": 2.3378, "step": 1694 }, { "epoch": 0.23, "grad_norm": 0.31640625, "learning_rate": 4.984380240026183e-05, "loss": 2.3206, "step": 1695 }, { "epoch": 0.23, "grad_norm": 0.302734375, "learning_rate": 4.9843566073906734e-05, "loss": 2.3046, "step": 1696 }, { "epoch": 0.23, "grad_norm": 0.298828125, "learning_rate": 4.984332956946744e-05, "loss": 2.2761, "step": 1697 }, { "epoch": 0.23, "grad_norm": 0.3046875, "learning_rate": 4.984309288694562e-05, "loss": 2.3253, "step": 1698 }, { "epoch": 0.23, "grad_norm": 0.30859375, "learning_rate": 4.9842856026343e-05, "loss": 2.3319, "step": 1699 }, { "epoch": 0.23, "grad_norm": 0.283203125, "learning_rate": 4.984261898766126e-05, "loss": 2.3165, "step": 1700 }, { "epoch": 0.23, "grad_norm": 0.291015625, "learning_rate": 4.984238177090211e-05, "loss": 2.2917, "step": 1701 }, { "epoch": 0.23, "grad_norm": 0.30859375, "learning_rate": 4.984214437606725e-05, "loss": 2.301, "step": 1702 }, { "epoch": 0.23, "grad_norm": 0.302734375, "learning_rate": 4.984190680315836e-05, "loss": 2.2864, "step": 1703 }, { "epoch": 0.23, "grad_norm": 0.302734375, "learning_rate": 4.984166905217717e-05, "loss": 2.3107, "step": 1704 }, { "epoch": 0.23, "grad_norm": 0.2890625, "learning_rate": 4.984143112312537e-05, "loss": 2.2932, "step": 1705 }, { "epoch": 0.23, "grad_norm": 0.296875, "learning_rate": 4.9841193016004675e-05, "loss": 2.3225, "step": 1706 }, { "epoch": 0.23, "grad_norm": 0.3125, "learning_rate": 4.984095473081679e-05, "loss": 2.2975, "step": 1707 }, { "epoch": 0.23, "grad_norm": 0.310546875, "learning_rate": 4.9840716267563406e-05, "loss": 2.3011, "step": 1708 }, { "epoch": 0.23, "grad_norm": 0.3046875, "learning_rate": 4.9840477626246256e-05, "loss": 2.3182, "step": 1709 }, { "epoch": 0.23, "grad_norm": 0.2890625, "learning_rate": 4.984023880686704e-05, "loss": 2.3309, "step": 1710 }, { "epoch": 0.23, "grad_norm": 0.28515625, "learning_rate": 4.983999980942747e-05, "loss": 2.3062, "step": 1711 }, { "epoch": 0.23, "grad_norm": 0.30859375, "learning_rate": 4.983976063392926e-05, "loss": 2.3153, "step": 1712 }, { "epoch": 0.23, "grad_norm": 0.337890625, "learning_rate": 4.983952128037412e-05, "loss": 2.3025, "step": 1713 }, { "epoch": 0.23, "grad_norm": 0.296875, "learning_rate": 4.983928174876377e-05, "loss": 2.3103, "step": 1714 }, { "epoch": 0.23, "grad_norm": 0.30078125, "learning_rate": 4.983904203909993e-05, "loss": 2.3175, "step": 1715 }, { "epoch": 0.23, "grad_norm": 0.314453125, "learning_rate": 4.983880215138431e-05, "loss": 2.317, "step": 1716 }, { "epoch": 0.23, "grad_norm": 0.318359375, "learning_rate": 4.983856208561864e-05, "loss": 2.2934, "step": 1717 }, { "epoch": 0.23, "grad_norm": 0.326171875, "learning_rate": 4.983832184180464e-05, "loss": 2.3004, "step": 1718 }, { "epoch": 0.23, "grad_norm": 0.3125, "learning_rate": 4.983808141994402e-05, "loss": 2.3213, "step": 1719 }, { "epoch": 0.23, "grad_norm": 0.310546875, "learning_rate": 4.983784082003852e-05, "loss": 2.2908, "step": 1720 }, { "epoch": 0.23, "grad_norm": 0.30859375, "learning_rate": 4.9837600042089845e-05, "loss": 2.3056, "step": 1721 }, { "epoch": 0.23, "grad_norm": 0.314453125, "learning_rate": 4.983735908609973e-05, "loss": 2.3121, "step": 1722 }, { "epoch": 0.23, "grad_norm": 0.322265625, "learning_rate": 4.983711795206991e-05, "loss": 2.2917, "step": 1723 }, { "epoch": 0.23, "grad_norm": 0.310546875, "learning_rate": 4.9836876640002115e-05, "loss": 2.291, "step": 1724 }, { "epoch": 0.23, "grad_norm": 0.294921875, "learning_rate": 4.983663514989806e-05, "loss": 2.2692, "step": 1725 }, { "epoch": 0.23, "grad_norm": 0.478515625, "learning_rate": 4.983639348175948e-05, "loss": 2.3221, "step": 1726 }, { "epoch": 0.23, "grad_norm": 0.3046875, "learning_rate": 4.9836151635588114e-05, "loss": 2.3115, "step": 1727 }, { "epoch": 0.23, "grad_norm": 0.294921875, "learning_rate": 4.983590961138569e-05, "loss": 2.3121, "step": 1728 }, { "epoch": 0.23, "grad_norm": 0.33984375, "learning_rate": 4.9835667409153943e-05, "loss": 2.2999, "step": 1729 }, { "epoch": 0.23, "grad_norm": 0.33984375, "learning_rate": 4.9835425028894614e-05, "loss": 2.2806, "step": 1730 }, { "epoch": 0.23, "grad_norm": 0.294921875, "learning_rate": 4.983518247060944e-05, "loss": 2.2924, "step": 1731 }, { "epoch": 0.23, "grad_norm": 0.306640625, "learning_rate": 4.983493973430016e-05, "loss": 2.2813, "step": 1732 }, { "epoch": 0.23, "grad_norm": 0.32421875, "learning_rate": 4.9834696819968505e-05, "loss": 2.3125, "step": 1733 }, { "epoch": 0.23, "grad_norm": 0.314453125, "learning_rate": 4.9834453727616227e-05, "loss": 2.3052, "step": 1734 }, { "epoch": 0.23, "grad_norm": 0.31640625, "learning_rate": 4.983421045724505e-05, "loss": 2.297, "step": 1735 }, { "epoch": 0.23, "grad_norm": 0.314453125, "learning_rate": 4.983396700885675e-05, "loss": 2.3055, "step": 1736 }, { "epoch": 0.23, "grad_norm": 0.3046875, "learning_rate": 4.983372338245305e-05, "loss": 2.2934, "step": 1737 }, { "epoch": 0.23, "grad_norm": 0.310546875, "learning_rate": 4.98334795780357e-05, "loss": 2.269, "step": 1738 }, { "epoch": 0.23, "grad_norm": 0.3046875, "learning_rate": 4.983323559560645e-05, "loss": 2.3184, "step": 1739 }, { "epoch": 0.23, "grad_norm": 0.310546875, "learning_rate": 4.983299143516704e-05, "loss": 2.3375, "step": 1740 }, { "epoch": 0.23, "grad_norm": 0.31640625, "learning_rate": 4.983274709671923e-05, "loss": 2.3072, "step": 1741 }, { "epoch": 0.23, "grad_norm": 0.310546875, "learning_rate": 4.983250258026477e-05, "loss": 2.334, "step": 1742 }, { "epoch": 0.23, "grad_norm": 0.3125, "learning_rate": 4.983225788580541e-05, "loss": 2.2868, "step": 1743 }, { "epoch": 0.23, "grad_norm": 0.34765625, "learning_rate": 4.983201301334291e-05, "loss": 2.299, "step": 1744 }, { "epoch": 0.23, "grad_norm": 0.287109375, "learning_rate": 4.983176796287902e-05, "loss": 2.2853, "step": 1745 }, { "epoch": 0.23, "grad_norm": 0.310546875, "learning_rate": 4.983152273441549e-05, "loss": 2.3314, "step": 1746 }, { "epoch": 0.23, "grad_norm": 0.296875, "learning_rate": 4.983127732795409e-05, "loss": 2.3124, "step": 1747 }, { "epoch": 0.23, "grad_norm": 0.333984375, "learning_rate": 4.9831031743496576e-05, "loss": 2.2995, "step": 1748 }, { "epoch": 0.23, "grad_norm": 0.322265625, "learning_rate": 4.98307859810447e-05, "loss": 2.3092, "step": 1749 }, { "epoch": 0.23, "grad_norm": 0.3203125, "learning_rate": 4.983054004060024e-05, "loss": 2.3014, "step": 1750 }, { "epoch": 0.23, "grad_norm": 0.294921875, "learning_rate": 4.983029392216494e-05, "loss": 2.2992, "step": 1751 }, { "epoch": 0.23, "grad_norm": 0.283203125, "learning_rate": 4.9830047625740586e-05, "loss": 2.311, "step": 1752 }, { "epoch": 0.23, "grad_norm": 0.3125, "learning_rate": 4.982980115132892e-05, "loss": 2.2768, "step": 1753 }, { "epoch": 0.23, "grad_norm": 0.318359375, "learning_rate": 4.982955449893172e-05, "loss": 2.34, "step": 1754 }, { "epoch": 0.23, "grad_norm": 0.322265625, "learning_rate": 4.982930766855076e-05, "loss": 2.3178, "step": 1755 }, { "epoch": 0.23, "grad_norm": 0.302734375, "learning_rate": 4.98290606601878e-05, "loss": 2.3172, "step": 1756 }, { "epoch": 0.23, "grad_norm": 0.298828125, "learning_rate": 4.982881347384462e-05, "loss": 2.3105, "step": 1757 }, { "epoch": 0.23, "grad_norm": 0.322265625, "learning_rate": 4.982856610952298e-05, "loss": 2.2906, "step": 1758 }, { "epoch": 0.23, "grad_norm": 0.33984375, "learning_rate": 4.9828318567224654e-05, "loss": 2.2959, "step": 1759 }, { "epoch": 0.23, "grad_norm": 0.294921875, "learning_rate": 4.9828070846951425e-05, "loss": 2.3036, "step": 1760 }, { "epoch": 0.23, "grad_norm": 0.306640625, "learning_rate": 4.9827822948705074e-05, "loss": 2.3058, "step": 1761 }, { "epoch": 0.24, "grad_norm": 0.3046875, "learning_rate": 4.982757487248736e-05, "loss": 2.3269, "step": 1762 }, { "epoch": 0.24, "grad_norm": 0.31640625, "learning_rate": 4.9827326618300076e-05, "loss": 2.2957, "step": 1763 }, { "epoch": 0.24, "grad_norm": 0.32421875, "learning_rate": 4.982707818614498e-05, "loss": 2.2656, "step": 1764 }, { "epoch": 0.24, "grad_norm": 0.291015625, "learning_rate": 4.982682957602389e-05, "loss": 2.3233, "step": 1765 }, { "epoch": 0.24, "grad_norm": 0.283203125, "learning_rate": 4.9826580787938556e-05, "loss": 2.2983, "step": 1766 }, { "epoch": 0.24, "grad_norm": 0.298828125, "learning_rate": 4.982633182189077e-05, "loss": 2.3138, "step": 1767 }, { "epoch": 0.24, "grad_norm": 0.30078125, "learning_rate": 4.982608267788232e-05, "loss": 2.3112, "step": 1768 }, { "epoch": 0.24, "grad_norm": 0.294921875, "learning_rate": 4.9825833355914995e-05, "loss": 2.2961, "step": 1769 }, { "epoch": 0.24, "grad_norm": 0.314453125, "learning_rate": 4.982558385599058e-05, "loss": 2.3402, "step": 1770 }, { "epoch": 0.24, "grad_norm": 0.31640625, "learning_rate": 4.9825334178110854e-05, "loss": 2.3011, "step": 1771 }, { "epoch": 0.24, "grad_norm": 0.318359375, "learning_rate": 4.982508432227762e-05, "loss": 2.3111, "step": 1772 }, { "epoch": 0.24, "grad_norm": 0.32421875, "learning_rate": 4.982483428849266e-05, "loss": 2.2812, "step": 1773 }, { "epoch": 0.24, "grad_norm": 0.298828125, "learning_rate": 4.9824584076757774e-05, "loss": 2.3419, "step": 1774 }, { "epoch": 0.24, "grad_norm": 0.283203125, "learning_rate": 4.982433368707474e-05, "loss": 2.2834, "step": 1775 }, { "epoch": 0.24, "grad_norm": 0.318359375, "learning_rate": 4.982408311944538e-05, "loss": 2.3404, "step": 1776 }, { "epoch": 0.24, "grad_norm": 0.298828125, "learning_rate": 4.982383237387146e-05, "loss": 2.312, "step": 1777 }, { "epoch": 0.24, "grad_norm": 0.30859375, "learning_rate": 4.98235814503548e-05, "loss": 2.3213, "step": 1778 }, { "epoch": 0.24, "grad_norm": 0.302734375, "learning_rate": 4.9823330348897184e-05, "loss": 2.2976, "step": 1779 }, { "epoch": 0.24, "grad_norm": 0.306640625, "learning_rate": 4.9823079069500425e-05, "loss": 2.2943, "step": 1780 }, { "epoch": 0.24, "grad_norm": 0.298828125, "learning_rate": 4.982282761216631e-05, "loss": 2.312, "step": 1781 }, { "epoch": 0.24, "grad_norm": 0.318359375, "learning_rate": 4.982257597689665e-05, "loss": 2.3087, "step": 1782 }, { "epoch": 0.24, "grad_norm": 0.330078125, "learning_rate": 4.982232416369325e-05, "loss": 2.3031, "step": 1783 }, { "epoch": 0.24, "grad_norm": 0.310546875, "learning_rate": 4.982207217255791e-05, "loss": 2.2911, "step": 1784 }, { "epoch": 0.24, "grad_norm": 0.29296875, "learning_rate": 4.9821820003492445e-05, "loss": 2.2971, "step": 1785 }, { "epoch": 0.24, "grad_norm": 0.3046875, "learning_rate": 4.9821567656498646e-05, "loss": 2.2805, "step": 1786 }, { "epoch": 0.24, "grad_norm": 0.318359375, "learning_rate": 4.9821315131578347e-05, "loss": 2.3114, "step": 1787 }, { "epoch": 0.24, "grad_norm": 0.314453125, "learning_rate": 4.9821062428733335e-05, "loss": 2.2857, "step": 1788 }, { "epoch": 0.24, "grad_norm": 0.306640625, "learning_rate": 4.982080954796543e-05, "loss": 2.3175, "step": 1789 }, { "epoch": 0.24, "grad_norm": 0.296875, "learning_rate": 4.982055648927644e-05, "loss": 2.3136, "step": 1790 }, { "epoch": 0.24, "grad_norm": 0.283203125, "learning_rate": 4.982030325266819e-05, "loss": 2.2918, "step": 1791 }, { "epoch": 0.24, "grad_norm": 0.291015625, "learning_rate": 4.982004983814249e-05, "loss": 2.3156, "step": 1792 }, { "epoch": 0.24, "grad_norm": 0.296875, "learning_rate": 4.9819796245701154e-05, "loss": 2.3275, "step": 1793 }, { "epoch": 0.24, "grad_norm": 0.3125, "learning_rate": 4.9819542475346e-05, "loss": 2.305, "step": 1794 }, { "epoch": 0.24, "grad_norm": 0.306640625, "learning_rate": 4.981928852707884e-05, "loss": 2.3208, "step": 1795 }, { "epoch": 0.24, "grad_norm": 0.296875, "learning_rate": 4.981903440090152e-05, "loss": 2.3261, "step": 1796 }, { "epoch": 0.24, "grad_norm": 0.27734375, "learning_rate": 4.981878009681583e-05, "loss": 2.2946, "step": 1797 }, { "epoch": 0.24, "grad_norm": 0.310546875, "learning_rate": 4.981852561482362e-05, "loss": 2.3413, "step": 1798 }, { "epoch": 0.24, "grad_norm": 0.3125, "learning_rate": 4.981827095492669e-05, "loss": 2.3366, "step": 1799 }, { "epoch": 0.24, "grad_norm": 0.287109375, "learning_rate": 4.981801611712689e-05, "loss": 2.3033, "step": 1800 }, { "epoch": 0.24, "grad_norm": 0.3125, "learning_rate": 4.981776110142602e-05, "loss": 2.2831, "step": 1801 }, { "epoch": 0.24, "grad_norm": 0.30859375, "learning_rate": 4.981750590782592e-05, "loss": 2.3133, "step": 1802 }, { "epoch": 0.24, "grad_norm": 0.294921875, "learning_rate": 4.981725053632843e-05, "loss": 2.3094, "step": 1803 }, { "epoch": 0.24, "grad_norm": 0.2890625, "learning_rate": 4.981699498693537e-05, "loss": 2.307, "step": 1804 }, { "epoch": 0.24, "grad_norm": 0.298828125, "learning_rate": 4.981673925964858e-05, "loss": 2.3079, "step": 1805 }, { "epoch": 0.24, "grad_norm": 0.291015625, "learning_rate": 4.981648335446987e-05, "loss": 2.2968, "step": 1806 }, { "epoch": 0.24, "grad_norm": 0.296875, "learning_rate": 4.9816227271401106e-05, "loss": 2.3225, "step": 1807 }, { "epoch": 0.24, "grad_norm": 0.298828125, "learning_rate": 4.98159710104441e-05, "loss": 2.3032, "step": 1808 }, { "epoch": 0.24, "grad_norm": 0.388671875, "learning_rate": 4.98157145716007e-05, "loss": 2.3001, "step": 1809 }, { "epoch": 0.24, "grad_norm": 0.322265625, "learning_rate": 4.9815457954872744e-05, "loss": 2.3149, "step": 1810 }, { "epoch": 0.24, "grad_norm": 0.283203125, "learning_rate": 4.981520116026206e-05, "loss": 2.2906, "step": 1811 }, { "epoch": 0.24, "grad_norm": 0.2890625, "learning_rate": 4.9814944187770504e-05, "loss": 2.3062, "step": 1812 }, { "epoch": 0.24, "grad_norm": 0.3203125, "learning_rate": 4.9814687037399913e-05, "loss": 2.2866, "step": 1813 }, { "epoch": 0.24, "grad_norm": 0.29296875, "learning_rate": 4.981442970915213e-05, "loss": 2.3278, "step": 1814 }, { "epoch": 0.24, "grad_norm": 0.310546875, "learning_rate": 4.9814172203029e-05, "loss": 2.3222, "step": 1815 }, { "epoch": 0.24, "grad_norm": 0.3125, "learning_rate": 4.981391451903236e-05, "loss": 2.2911, "step": 1816 }, { "epoch": 0.24, "grad_norm": 0.3125, "learning_rate": 4.981365665716407e-05, "loss": 2.3192, "step": 1817 }, { "epoch": 0.24, "grad_norm": 0.30078125, "learning_rate": 4.981339861742598e-05, "loss": 2.3334, "step": 1818 }, { "epoch": 0.24, "grad_norm": 0.302734375, "learning_rate": 4.981314039981992e-05, "loss": 2.3222, "step": 1819 }, { "epoch": 0.24, "grad_norm": 0.322265625, "learning_rate": 4.981288200434776e-05, "loss": 2.288, "step": 1820 }, { "epoch": 0.24, "grad_norm": 0.30859375, "learning_rate": 4.981262343101135e-05, "loss": 2.2795, "step": 1821 }, { "epoch": 0.24, "grad_norm": 0.283203125, "learning_rate": 4.981236467981253e-05, "loss": 2.281, "step": 1822 }, { "epoch": 0.24, "grad_norm": 0.296875, "learning_rate": 4.981210575075317e-05, "loss": 2.3098, "step": 1823 }, { "epoch": 0.24, "grad_norm": 0.306640625, "learning_rate": 4.981184664383512e-05, "loss": 2.3317, "step": 1824 }, { "epoch": 0.24, "grad_norm": 0.2890625, "learning_rate": 4.981158735906024e-05, "loss": 2.2812, "step": 1825 }, { "epoch": 0.24, "grad_norm": 0.30859375, "learning_rate": 4.981132789643038e-05, "loss": 2.2776, "step": 1826 }, { "epoch": 0.24, "grad_norm": 0.318359375, "learning_rate": 4.981106825594741e-05, "loss": 2.3103, "step": 1827 }, { "epoch": 0.24, "grad_norm": 0.3359375, "learning_rate": 4.9810808437613185e-05, "loss": 2.2978, "step": 1828 }, { "epoch": 0.24, "grad_norm": 0.298828125, "learning_rate": 4.9810548441429586e-05, "loss": 2.3153, "step": 1829 }, { "epoch": 0.24, "grad_norm": 0.287109375, "learning_rate": 4.981028826739844e-05, "loss": 2.2907, "step": 1830 }, { "epoch": 0.24, "grad_norm": 0.2890625, "learning_rate": 4.981002791552164e-05, "loss": 2.2898, "step": 1831 }, { "epoch": 0.24, "grad_norm": 0.30859375, "learning_rate": 4.9809767385801046e-05, "loss": 2.2913, "step": 1832 }, { "epoch": 0.24, "grad_norm": 0.330078125, "learning_rate": 4.980950667823853e-05, "loss": 2.3049, "step": 1833 }, { "epoch": 0.24, "grad_norm": 0.310546875, "learning_rate": 4.980924579283594e-05, "loss": 2.3285, "step": 1834 }, { "epoch": 0.24, "grad_norm": 0.3046875, "learning_rate": 4.980898472959517e-05, "loss": 2.2928, "step": 1835 }, { "epoch": 0.24, "grad_norm": 0.3046875, "learning_rate": 4.9808723488518086e-05, "loss": 2.3413, "step": 1836 }, { "epoch": 0.25, "grad_norm": 0.294921875, "learning_rate": 4.980846206960656e-05, "loss": 2.319, "step": 1837 }, { "epoch": 0.25, "grad_norm": 0.302734375, "learning_rate": 4.980820047286245e-05, "loss": 2.3251, "step": 1838 }, { "epoch": 0.25, "grad_norm": 0.3125, "learning_rate": 4.980793869828765e-05, "loss": 2.3015, "step": 1839 }, { "epoch": 0.25, "grad_norm": 0.30859375, "learning_rate": 4.980767674588403e-05, "loss": 2.3298, "step": 1840 }, { "epoch": 0.25, "grad_norm": 0.345703125, "learning_rate": 4.980741461565347e-05, "loss": 2.2896, "step": 1841 }, { "epoch": 0.25, "grad_norm": 0.2890625, "learning_rate": 4.980715230759785e-05, "loss": 2.2989, "step": 1842 }, { "epoch": 0.25, "grad_norm": 0.28515625, "learning_rate": 4.9806889821719045e-05, "loss": 2.2742, "step": 1843 }, { "epoch": 0.25, "grad_norm": 0.302734375, "learning_rate": 4.9806627158018935e-05, "loss": 2.2966, "step": 1844 }, { "epoch": 0.25, "grad_norm": 0.310546875, "learning_rate": 4.9806364316499424e-05, "loss": 2.3193, "step": 1845 }, { "epoch": 0.25, "grad_norm": 0.287109375, "learning_rate": 4.980610129716236e-05, "loss": 2.3132, "step": 1846 }, { "epoch": 0.25, "grad_norm": 0.3046875, "learning_rate": 4.980583810000966e-05, "loss": 2.3008, "step": 1847 }, { "epoch": 0.25, "grad_norm": 0.294921875, "learning_rate": 4.980557472504319e-05, "loss": 2.2912, "step": 1848 }, { "epoch": 0.25, "grad_norm": 0.296875, "learning_rate": 4.980531117226485e-05, "loss": 2.3011, "step": 1849 }, { "epoch": 0.25, "grad_norm": 0.294921875, "learning_rate": 4.9805047441676525e-05, "loss": 2.3516, "step": 1850 }, { "epoch": 0.25, "grad_norm": 0.298828125, "learning_rate": 4.980478353328011e-05, "loss": 2.3114, "step": 1851 }, { "epoch": 0.25, "grad_norm": 0.291015625, "learning_rate": 4.9804519447077494e-05, "loss": 2.2961, "step": 1852 }, { "epoch": 0.25, "grad_norm": 0.310546875, "learning_rate": 4.9804255183070556e-05, "loss": 2.2927, "step": 1853 }, { "epoch": 0.25, "grad_norm": 0.296875, "learning_rate": 4.980399074126121e-05, "loss": 2.2962, "step": 1854 }, { "epoch": 0.25, "grad_norm": 0.283203125, "learning_rate": 4.980372612165135e-05, "loss": 2.3161, "step": 1855 }, { "epoch": 0.25, "grad_norm": 0.310546875, "learning_rate": 4.980346132424286e-05, "loss": 2.3179, "step": 1856 }, { "epoch": 0.25, "grad_norm": 0.29296875, "learning_rate": 4.980319634903765e-05, "loss": 2.3163, "step": 1857 }, { "epoch": 0.25, "grad_norm": 0.3046875, "learning_rate": 4.980293119603762e-05, "loss": 2.286, "step": 1858 }, { "epoch": 0.25, "grad_norm": 0.283203125, "learning_rate": 4.980266586524466e-05, "loss": 2.3245, "step": 1859 }, { "epoch": 0.25, "grad_norm": 0.27734375, "learning_rate": 4.9802400356660674e-05, "loss": 2.3126, "step": 1860 }, { "epoch": 0.25, "grad_norm": 0.287109375, "learning_rate": 4.980213467028757e-05, "loss": 2.2974, "step": 1861 }, { "epoch": 0.25, "grad_norm": 0.3203125, "learning_rate": 4.9801868806127254e-05, "loss": 2.3102, "step": 1862 }, { "epoch": 0.25, "grad_norm": 0.30859375, "learning_rate": 4.980160276418163e-05, "loss": 2.2972, "step": 1863 }, { "epoch": 0.25, "grad_norm": 0.294921875, "learning_rate": 4.9801336544452604e-05, "loss": 2.3188, "step": 1864 }, { "epoch": 0.25, "grad_norm": 0.30078125, "learning_rate": 4.980107014694208e-05, "loss": 2.3342, "step": 1865 }, { "epoch": 0.25, "grad_norm": 0.30078125, "learning_rate": 4.980080357165198e-05, "loss": 2.3027, "step": 1866 }, { "epoch": 0.25, "grad_norm": 0.30078125, "learning_rate": 4.98005368185842e-05, "loss": 2.3314, "step": 1867 }, { "epoch": 0.25, "grad_norm": 0.302734375, "learning_rate": 4.980026988774066e-05, "loss": 2.3, "step": 1868 }, { "epoch": 0.25, "grad_norm": 0.322265625, "learning_rate": 4.9800002779123276e-05, "loss": 2.2944, "step": 1869 }, { "epoch": 0.25, "grad_norm": 0.279296875, "learning_rate": 4.979973549273396e-05, "loss": 2.337, "step": 1870 }, { "epoch": 0.25, "grad_norm": 0.28515625, "learning_rate": 4.9799468028574625e-05, "loss": 2.3215, "step": 1871 }, { "epoch": 0.25, "grad_norm": 0.3046875, "learning_rate": 4.9799200386647185e-05, "loss": 2.2852, "step": 1872 }, { "epoch": 0.25, "grad_norm": 0.30078125, "learning_rate": 4.979893256695357e-05, "loss": 2.3052, "step": 1873 }, { "epoch": 0.25, "grad_norm": 0.296875, "learning_rate": 4.979866456949569e-05, "loss": 2.3072, "step": 1874 }, { "epoch": 0.25, "grad_norm": 0.296875, "learning_rate": 4.9798396394275477e-05, "loss": 2.2669, "step": 1875 }, { "epoch": 0.25, "grad_norm": 0.314453125, "learning_rate": 4.9798128041294845e-05, "loss": 2.315, "step": 1876 }, { "epoch": 0.25, "grad_norm": 0.302734375, "learning_rate": 4.979785951055571e-05, "loss": 2.2941, "step": 1877 }, { "epoch": 0.25, "grad_norm": 0.2890625, "learning_rate": 4.979759080206001e-05, "loss": 2.3155, "step": 1878 }, { "epoch": 0.25, "grad_norm": 0.28125, "learning_rate": 4.979732191580967e-05, "loss": 2.3131, "step": 1879 }, { "epoch": 0.25, "grad_norm": 0.287109375, "learning_rate": 4.9797052851806615e-05, "loss": 2.3214, "step": 1880 }, { "epoch": 0.25, "grad_norm": 0.3046875, "learning_rate": 4.9796783610052764e-05, "loss": 2.3002, "step": 1881 }, { "epoch": 0.25, "grad_norm": 0.296875, "learning_rate": 4.979651419055007e-05, "loss": 2.3235, "step": 1882 }, { "epoch": 0.25, "grad_norm": 0.294921875, "learning_rate": 4.979624459330044e-05, "loss": 2.3013, "step": 1883 }, { "epoch": 0.25, "grad_norm": 0.291015625, "learning_rate": 4.979597481830582e-05, "loss": 2.2562, "step": 1884 }, { "epoch": 0.25, "grad_norm": 0.298828125, "learning_rate": 4.9795704865568135e-05, "loss": 2.287, "step": 1885 }, { "epoch": 0.25, "grad_norm": 0.298828125, "learning_rate": 4.979543473508934e-05, "loss": 2.3154, "step": 1886 }, { "epoch": 0.25, "grad_norm": 0.3203125, "learning_rate": 4.979516442687134e-05, "loss": 2.3025, "step": 1887 }, { "epoch": 0.25, "grad_norm": 0.30859375, "learning_rate": 4.97948939409161e-05, "loss": 2.2844, "step": 1888 }, { "epoch": 0.25, "grad_norm": 0.30078125, "learning_rate": 4.979462327722555e-05, "loss": 2.3329, "step": 1889 }, { "epoch": 0.25, "grad_norm": 0.291015625, "learning_rate": 4.979435243580162e-05, "loss": 2.2951, "step": 1890 }, { "epoch": 0.25, "grad_norm": 0.302734375, "learning_rate": 4.9794081416646263e-05, "loss": 2.3367, "step": 1891 }, { "epoch": 0.25, "grad_norm": 0.318359375, "learning_rate": 4.979381021976142e-05, "loss": 2.2901, "step": 1892 }, { "epoch": 0.25, "grad_norm": 0.30078125, "learning_rate": 4.979353884514903e-05, "loss": 2.2805, "step": 1893 }, { "epoch": 0.25, "grad_norm": 0.3125, "learning_rate": 4.979326729281105e-05, "loss": 2.3128, "step": 1894 }, { "epoch": 0.25, "grad_norm": 0.3203125, "learning_rate": 4.979299556274942e-05, "loss": 2.3264, "step": 1895 }, { "epoch": 0.25, "grad_norm": 0.302734375, "learning_rate": 4.9792723654966076e-05, "loss": 2.2853, "step": 1896 }, { "epoch": 0.25, "grad_norm": 0.283203125, "learning_rate": 4.979245156946299e-05, "loss": 2.3051, "step": 1897 }, { "epoch": 0.25, "grad_norm": 0.294921875, "learning_rate": 4.9792179306242096e-05, "loss": 2.3011, "step": 1898 }, { "epoch": 0.25, "grad_norm": 0.2890625, "learning_rate": 4.979190686530534e-05, "loss": 2.2913, "step": 1899 }, { "epoch": 0.25, "grad_norm": 0.306640625, "learning_rate": 4.979163424665469e-05, "loss": 2.2994, "step": 1900 }, { "epoch": 0.25, "grad_norm": 0.3203125, "learning_rate": 4.97913614502921e-05, "loss": 2.3286, "step": 1901 }, { "epoch": 0.25, "grad_norm": 0.298828125, "learning_rate": 4.979108847621951e-05, "loss": 2.3132, "step": 1902 }, { "epoch": 0.25, "grad_norm": 0.3203125, "learning_rate": 4.97908153244389e-05, "loss": 2.2598, "step": 1903 }, { "epoch": 0.25, "grad_norm": 0.287109375, "learning_rate": 4.979054199495221e-05, "loss": 2.3125, "step": 1904 }, { "epoch": 0.25, "grad_norm": 0.302734375, "learning_rate": 4.9790268487761404e-05, "loss": 2.2867, "step": 1905 }, { "epoch": 0.25, "grad_norm": 0.291015625, "learning_rate": 4.978999480286844e-05, "loss": 2.279, "step": 1906 }, { "epoch": 0.25, "grad_norm": 0.28125, "learning_rate": 4.9789720940275286e-05, "loss": 2.3073, "step": 1907 }, { "epoch": 0.25, "grad_norm": 0.3046875, "learning_rate": 4.978944689998389e-05, "loss": 2.3142, "step": 1908 }, { "epoch": 0.25, "grad_norm": 0.3046875, "learning_rate": 4.9789172681996246e-05, "loss": 2.3269, "step": 1909 }, { "epoch": 0.25, "grad_norm": 0.32421875, "learning_rate": 4.978889828631429e-05, "loss": 2.2954, "step": 1910 }, { "epoch": 0.25, "grad_norm": 0.30078125, "learning_rate": 4.978862371294001e-05, "loss": 2.3009, "step": 1911 }, { "epoch": 0.26, "grad_norm": 0.296875, "learning_rate": 4.978834896187535e-05, "loss": 2.3221, "step": 1912 }, { "epoch": 0.26, "grad_norm": 0.30078125, "learning_rate": 4.978807403312231e-05, "loss": 2.3034, "step": 1913 }, { "epoch": 0.26, "grad_norm": 0.30078125, "learning_rate": 4.9787798926682836e-05, "loss": 2.2873, "step": 1914 }, { "epoch": 0.26, "grad_norm": 0.3125, "learning_rate": 4.9787523642558916e-05, "loss": 2.2953, "step": 1915 }, { "epoch": 0.26, "grad_norm": 0.306640625, "learning_rate": 4.978724818075251e-05, "loss": 2.2802, "step": 1916 }, { "epoch": 0.26, "grad_norm": 0.30859375, "learning_rate": 4.97869725412656e-05, "loss": 2.3042, "step": 1917 }, { "epoch": 0.26, "grad_norm": 0.314453125, "learning_rate": 4.978669672410017e-05, "loss": 2.308, "step": 1918 }, { "epoch": 0.26, "grad_norm": 0.306640625, "learning_rate": 4.9786420729258176e-05, "loss": 2.3192, "step": 1919 }, { "epoch": 0.26, "grad_norm": 0.30078125, "learning_rate": 4.978614455674161e-05, "loss": 2.2852, "step": 1920 }, { "epoch": 0.26, "grad_norm": 0.30859375, "learning_rate": 4.978586820655245e-05, "loss": 2.2935, "step": 1921 }, { "epoch": 0.26, "grad_norm": 0.302734375, "learning_rate": 4.9785591678692686e-05, "loss": 2.2915, "step": 1922 }, { "epoch": 0.26, "grad_norm": 0.314453125, "learning_rate": 4.978531497316429e-05, "loss": 2.2736, "step": 1923 }, { "epoch": 0.26, "grad_norm": 0.298828125, "learning_rate": 4.978503808996924e-05, "loss": 2.3252, "step": 1924 }, { "epoch": 0.26, "grad_norm": 0.298828125, "learning_rate": 4.9784761029109525e-05, "loss": 2.3376, "step": 1925 }, { "epoch": 0.26, "grad_norm": 0.314453125, "learning_rate": 4.9784483790587144e-05, "loss": 2.3025, "step": 1926 }, { "epoch": 0.26, "grad_norm": 0.291015625, "learning_rate": 4.978420637440406e-05, "loss": 2.2839, "step": 1927 }, { "epoch": 0.26, "grad_norm": 0.30078125, "learning_rate": 4.978392878056228e-05, "loss": 2.3011, "step": 1928 }, { "epoch": 0.26, "grad_norm": 0.2890625, "learning_rate": 4.97836510090638e-05, "loss": 2.3126, "step": 1929 }, { "epoch": 0.26, "grad_norm": 0.30859375, "learning_rate": 4.9783373059910594e-05, "loss": 2.287, "step": 1930 }, { "epoch": 0.26, "grad_norm": 0.3203125, "learning_rate": 4.978309493310466e-05, "loss": 2.3258, "step": 1931 }, { "epoch": 0.26, "grad_norm": 0.30859375, "learning_rate": 4.978281662864799e-05, "loss": 2.3026, "step": 1932 }, { "epoch": 0.26, "grad_norm": 0.30859375, "learning_rate": 4.978253814654258e-05, "loss": 2.2964, "step": 1933 }, { "epoch": 0.26, "grad_norm": 0.30078125, "learning_rate": 4.978225948679043e-05, "loss": 2.3003, "step": 1934 }, { "epoch": 0.26, "grad_norm": 0.306640625, "learning_rate": 4.978198064939354e-05, "loss": 2.3049, "step": 1935 }, { "epoch": 0.26, "grad_norm": 0.31640625, "learning_rate": 4.97817016343539e-05, "loss": 2.2753, "step": 1936 }, { "epoch": 0.26, "grad_norm": 0.298828125, "learning_rate": 4.978142244167352e-05, "loss": 2.3024, "step": 1937 }, { "epoch": 0.26, "grad_norm": 0.30859375, "learning_rate": 4.978114307135439e-05, "loss": 2.301, "step": 1938 }, { "epoch": 0.26, "grad_norm": 0.3046875, "learning_rate": 4.9780863523398506e-05, "loss": 2.2873, "step": 1939 }, { "epoch": 0.26, "grad_norm": 0.30859375, "learning_rate": 4.97805837978079e-05, "loss": 2.3195, "step": 1940 }, { "epoch": 0.26, "grad_norm": 0.28515625, "learning_rate": 4.9780303894584554e-05, "loss": 2.2965, "step": 1941 }, { "epoch": 0.26, "grad_norm": 0.296875, "learning_rate": 4.978002381373048e-05, "loss": 2.3105, "step": 1942 }, { "epoch": 0.26, "grad_norm": 0.318359375, "learning_rate": 4.977974355524769e-05, "loss": 2.3076, "step": 1943 }, { "epoch": 0.26, "grad_norm": 0.298828125, "learning_rate": 4.977946311913819e-05, "loss": 2.3402, "step": 1944 }, { "epoch": 0.26, "grad_norm": 0.28125, "learning_rate": 4.977918250540399e-05, "loss": 2.3254, "step": 1945 }, { "epoch": 0.26, "grad_norm": 0.310546875, "learning_rate": 4.97789017140471e-05, "loss": 2.3022, "step": 1946 }, { "epoch": 0.26, "grad_norm": 0.314453125, "learning_rate": 4.977862074506954e-05, "loss": 2.2887, "step": 1947 }, { "epoch": 0.26, "grad_norm": 0.294921875, "learning_rate": 4.977833959847331e-05, "loss": 2.2833, "step": 1948 }, { "epoch": 0.26, "grad_norm": 0.306640625, "learning_rate": 4.9778058274260434e-05, "loss": 2.2968, "step": 1949 }, { "epoch": 0.26, "grad_norm": 0.3203125, "learning_rate": 4.977777677243294e-05, "loss": 2.2902, "step": 1950 }, { "epoch": 0.26, "grad_norm": 0.30078125, "learning_rate": 4.977749509299282e-05, "loss": 2.2659, "step": 1951 }, { "epoch": 0.26, "grad_norm": 0.298828125, "learning_rate": 4.977721323594211e-05, "loss": 2.2676, "step": 1952 }, { "epoch": 0.26, "grad_norm": 0.314453125, "learning_rate": 4.977693120128284e-05, "loss": 2.2937, "step": 1953 }, { "epoch": 0.26, "grad_norm": 0.322265625, "learning_rate": 4.9776648989017006e-05, "loss": 2.3039, "step": 1954 }, { "epoch": 0.26, "grad_norm": 0.302734375, "learning_rate": 4.977636659914665e-05, "loss": 2.2885, "step": 1955 }, { "epoch": 0.26, "grad_norm": 0.28515625, "learning_rate": 4.977608403167379e-05, "loss": 2.3044, "step": 1956 }, { "epoch": 0.26, "grad_norm": 0.291015625, "learning_rate": 4.977580128660046e-05, "loss": 2.3163, "step": 1957 }, { "epoch": 0.26, "grad_norm": 0.302734375, "learning_rate": 4.977551836392866e-05, "loss": 2.3177, "step": 1958 }, { "epoch": 0.26, "grad_norm": 0.28515625, "learning_rate": 4.9775235263660445e-05, "loss": 2.3092, "step": 1959 }, { "epoch": 0.26, "grad_norm": 0.3046875, "learning_rate": 4.9774951985797845e-05, "loss": 2.2787, "step": 1960 }, { "epoch": 0.26, "grad_norm": 0.291015625, "learning_rate": 4.977466853034287e-05, "loss": 2.3134, "step": 1961 }, { "epoch": 0.26, "grad_norm": 0.296875, "learning_rate": 4.9774384897297575e-05, "loss": 2.3288, "step": 1962 }, { "epoch": 0.26, "grad_norm": 0.3046875, "learning_rate": 4.977410108666397e-05, "loss": 2.2955, "step": 1963 }, { "epoch": 0.26, "grad_norm": 0.30078125, "learning_rate": 4.977381709844411e-05, "loss": 2.2715, "step": 1964 }, { "epoch": 0.26, "grad_norm": 0.30859375, "learning_rate": 4.977353293264002e-05, "loss": 2.3162, "step": 1965 }, { "epoch": 0.26, "grad_norm": 0.306640625, "learning_rate": 4.977324858925374e-05, "loss": 2.2907, "step": 1966 }, { "epoch": 0.26, "grad_norm": 0.302734375, "learning_rate": 4.977296406828731e-05, "loss": 2.3243, "step": 1967 }, { "epoch": 0.26, "grad_norm": 0.294921875, "learning_rate": 4.977267936974276e-05, "loss": 2.3463, "step": 1968 }, { "epoch": 0.26, "grad_norm": 0.302734375, "learning_rate": 4.9772394493622146e-05, "loss": 2.3044, "step": 1969 }, { "epoch": 0.26, "grad_norm": 0.302734375, "learning_rate": 4.9772109439927495e-05, "loss": 2.29, "step": 1970 }, { "epoch": 0.26, "grad_norm": 0.30859375, "learning_rate": 4.9771824208660857e-05, "loss": 2.2872, "step": 1971 }, { "epoch": 0.26, "grad_norm": 0.287109375, "learning_rate": 4.977153879982428e-05, "loss": 2.2735, "step": 1972 }, { "epoch": 0.26, "grad_norm": 0.30078125, "learning_rate": 4.97712532134198e-05, "loss": 2.2935, "step": 1973 }, { "epoch": 0.26, "grad_norm": 0.28125, "learning_rate": 4.977096744944947e-05, "loss": 2.3281, "step": 1974 }, { "epoch": 0.26, "grad_norm": 0.2890625, "learning_rate": 4.977068150791535e-05, "loss": 2.2798, "step": 1975 }, { "epoch": 0.26, "grad_norm": 0.302734375, "learning_rate": 4.977039538881947e-05, "loss": 2.3077, "step": 1976 }, { "epoch": 0.26, "grad_norm": 0.31640625, "learning_rate": 4.9770109092163884e-05, "loss": 2.3073, "step": 1977 }, { "epoch": 0.26, "grad_norm": 0.3203125, "learning_rate": 4.976982261795066e-05, "loss": 2.31, "step": 1978 }, { "epoch": 0.26, "grad_norm": 0.314453125, "learning_rate": 4.976953596618184e-05, "loss": 2.291, "step": 1979 }, { "epoch": 0.26, "grad_norm": 0.3125, "learning_rate": 4.976924913685948e-05, "loss": 2.2697, "step": 1980 }, { "epoch": 0.26, "grad_norm": 0.32421875, "learning_rate": 4.976896212998563e-05, "loss": 2.313, "step": 1981 }, { "epoch": 0.26, "grad_norm": 0.28515625, "learning_rate": 4.9768674945562366e-05, "loss": 2.3164, "step": 1982 }, { "epoch": 0.26, "grad_norm": 0.306640625, "learning_rate": 4.976838758359172e-05, "loss": 2.2746, "step": 1983 }, { "epoch": 0.26, "grad_norm": 0.341796875, "learning_rate": 4.9768100044075775e-05, "loss": 2.3106, "step": 1984 }, { "epoch": 0.26, "grad_norm": 0.302734375, "learning_rate": 4.9767812327016586e-05, "loss": 2.3187, "step": 1985 }, { "epoch": 0.26, "grad_norm": 0.2734375, "learning_rate": 4.97675244324162e-05, "loss": 2.2924, "step": 1986 }, { "epoch": 0.27, "grad_norm": 0.3125, "learning_rate": 4.97672363602767e-05, "loss": 2.2805, "step": 1987 }, { "epoch": 0.27, "grad_norm": 0.3046875, "learning_rate": 4.9766948110600145e-05, "loss": 2.3041, "step": 1988 }, { "epoch": 0.27, "grad_norm": 0.294921875, "learning_rate": 4.976665968338861e-05, "loss": 2.3121, "step": 1989 }, { "epoch": 0.27, "grad_norm": 0.28515625, "learning_rate": 4.9766371078644135e-05, "loss": 2.3026, "step": 1990 }, { "epoch": 0.27, "grad_norm": 0.31640625, "learning_rate": 4.9766082296368814e-05, "loss": 2.2763, "step": 1991 }, { "epoch": 0.27, "grad_norm": 0.296875, "learning_rate": 4.976579333656472e-05, "loss": 2.2988, "step": 1992 }, { "epoch": 0.27, "grad_norm": 0.287109375, "learning_rate": 4.97655041992339e-05, "loss": 2.2773, "step": 1993 }, { "epoch": 0.27, "grad_norm": 0.29296875, "learning_rate": 4.976521488437844e-05, "loss": 2.3032, "step": 1994 }, { "epoch": 0.27, "grad_norm": 0.296875, "learning_rate": 4.976492539200042e-05, "loss": 2.3343, "step": 1995 }, { "epoch": 0.27, "grad_norm": 0.328125, "learning_rate": 4.9764635722101913e-05, "loss": 2.2804, "step": 1996 }, { "epoch": 0.27, "grad_norm": 0.314453125, "learning_rate": 4.976434587468499e-05, "loss": 2.2958, "step": 1997 }, { "epoch": 0.27, "grad_norm": 0.31640625, "learning_rate": 4.976405584975173e-05, "loss": 2.2948, "step": 1998 }, { "epoch": 0.27, "grad_norm": 0.29296875, "learning_rate": 4.976376564730421e-05, "loss": 2.319, "step": 1999 }, { "epoch": 0.27, "grad_norm": 0.296875, "learning_rate": 4.976347526734452e-05, "loss": 2.3384, "step": 2000 }, { "epoch": 0.27, "eval_loss": 2.297675371170044, "eval_runtime": 616.297, "eval_samples_per_second": 62.91, "eval_steps_per_second": 7.865, "step": 2000 }, { "epoch": 0.27, "grad_norm": 0.294921875, "learning_rate": 4.9763184709874726e-05, "loss": 2.3108, "step": 2001 }, { "epoch": 0.27, "grad_norm": 0.318359375, "learning_rate": 4.9762893974896926e-05, "loss": 2.2987, "step": 2002 }, { "epoch": 0.27, "grad_norm": 1.6328125, "learning_rate": 4.9762603062413194e-05, "loss": 2.3051, "step": 2003 }, { "epoch": 0.27, "grad_norm": 0.326171875, "learning_rate": 4.9762311972425624e-05, "loss": 2.3002, "step": 2004 }, { "epoch": 0.27, "grad_norm": 0.30859375, "learning_rate": 4.976202070493629e-05, "loss": 2.3005, "step": 2005 }, { "epoch": 0.27, "grad_norm": 0.33203125, "learning_rate": 4.976172925994729e-05, "loss": 2.3194, "step": 2006 }, { "epoch": 0.27, "grad_norm": 0.29296875, "learning_rate": 4.9761437637460715e-05, "loss": 2.3066, "step": 2007 }, { "epoch": 0.27, "grad_norm": 0.31640625, "learning_rate": 4.976114583747864e-05, "loss": 2.2718, "step": 2008 }, { "epoch": 0.27, "grad_norm": 0.29296875, "learning_rate": 4.9760853860003174e-05, "loss": 2.3012, "step": 2009 }, { "epoch": 0.27, "grad_norm": 0.302734375, "learning_rate": 4.9760561705036404e-05, "loss": 2.3013, "step": 2010 }, { "epoch": 0.27, "grad_norm": 0.3046875, "learning_rate": 4.9760269372580426e-05, "loss": 2.3127, "step": 2011 }, { "epoch": 0.27, "grad_norm": 0.30078125, "learning_rate": 4.9759976862637326e-05, "loss": 2.3365, "step": 2012 }, { "epoch": 0.27, "grad_norm": 0.31640625, "learning_rate": 4.975968417520921e-05, "loss": 2.3083, "step": 2013 }, { "epoch": 0.27, "grad_norm": 0.298828125, "learning_rate": 4.975939131029818e-05, "loss": 2.2825, "step": 2014 }, { "epoch": 0.27, "grad_norm": 0.3046875, "learning_rate": 4.975909826790632e-05, "loss": 2.2981, "step": 2015 }, { "epoch": 0.27, "grad_norm": 0.294921875, "learning_rate": 4.975880504803575e-05, "loss": 2.2906, "step": 2016 }, { "epoch": 0.27, "grad_norm": 0.318359375, "learning_rate": 4.975851165068855e-05, "loss": 2.296, "step": 2017 }, { "epoch": 0.27, "grad_norm": 0.306640625, "learning_rate": 4.975821807586685e-05, "loss": 2.3204, "step": 2018 }, { "epoch": 0.27, "grad_norm": 0.29296875, "learning_rate": 4.975792432357273e-05, "loss": 2.2894, "step": 2019 }, { "epoch": 0.27, "grad_norm": 0.291015625, "learning_rate": 4.9757630393808306e-05, "loss": 2.317, "step": 2020 }, { "epoch": 0.27, "grad_norm": 0.3046875, "learning_rate": 4.9757336286575685e-05, "loss": 2.2904, "step": 2021 }, { "epoch": 0.27, "grad_norm": 0.310546875, "learning_rate": 4.975704200187698e-05, "loss": 2.2835, "step": 2022 }, { "epoch": 0.27, "grad_norm": 0.294921875, "learning_rate": 4.9756747539714276e-05, "loss": 2.3134, "step": 2023 }, { "epoch": 0.27, "grad_norm": 0.294921875, "learning_rate": 4.975645290008972e-05, "loss": 2.3089, "step": 2024 }, { "epoch": 0.27, "grad_norm": 0.29296875, "learning_rate": 4.97561580830054e-05, "loss": 2.2945, "step": 2025 }, { "epoch": 0.27, "grad_norm": 0.3046875, "learning_rate": 4.975586308846344e-05, "loss": 2.2841, "step": 2026 }, { "epoch": 0.27, "grad_norm": 0.2890625, "learning_rate": 4.975556791646594e-05, "loss": 2.2957, "step": 2027 }, { "epoch": 0.27, "grad_norm": 0.294921875, "learning_rate": 4.9755272567015034e-05, "loss": 2.2955, "step": 2028 }, { "epoch": 0.27, "grad_norm": 0.314453125, "learning_rate": 4.9754977040112835e-05, "loss": 2.2789, "step": 2029 }, { "epoch": 0.27, "grad_norm": 0.28515625, "learning_rate": 4.975468133576145e-05, "loss": 2.3015, "step": 2030 }, { "epoch": 0.27, "grad_norm": 0.287109375, "learning_rate": 4.975438545396301e-05, "loss": 2.3142, "step": 2031 }, { "epoch": 0.27, "grad_norm": 0.3046875, "learning_rate": 4.9754089394719625e-05, "loss": 2.2942, "step": 2032 }, { "epoch": 0.27, "grad_norm": 0.296875, "learning_rate": 4.9753793158033436e-05, "loss": 2.3016, "step": 2033 }, { "epoch": 0.27, "grad_norm": 0.30859375, "learning_rate": 4.975349674390655e-05, "loss": 2.2724, "step": 2034 }, { "epoch": 0.27, "grad_norm": 0.314453125, "learning_rate": 4.9753200152341095e-05, "loss": 2.2905, "step": 2035 }, { "epoch": 0.27, "grad_norm": 0.328125, "learning_rate": 4.9752903383339205e-05, "loss": 2.2876, "step": 2036 }, { "epoch": 0.27, "grad_norm": 0.296875, "learning_rate": 4.9752606436903e-05, "loss": 2.3097, "step": 2037 }, { "epoch": 0.27, "grad_norm": 0.30859375, "learning_rate": 4.9752309313034605e-05, "loss": 2.282, "step": 2038 }, { "epoch": 0.27, "grad_norm": 0.28515625, "learning_rate": 4.975201201173616e-05, "loss": 2.298, "step": 2039 }, { "epoch": 0.27, "grad_norm": 0.294921875, "learning_rate": 4.9751714533009785e-05, "loss": 2.2932, "step": 2040 }, { "epoch": 0.27, "grad_norm": 0.30859375, "learning_rate": 4.975141687685762e-05, "loss": 2.3271, "step": 2041 }, { "epoch": 0.27, "grad_norm": 0.3125, "learning_rate": 4.9751119043281795e-05, "loss": 2.291, "step": 2042 }, { "epoch": 0.27, "grad_norm": 0.31640625, "learning_rate": 4.975082103228445e-05, "loss": 2.3047, "step": 2043 }, { "epoch": 0.27, "grad_norm": 0.298828125, "learning_rate": 4.975052284386772e-05, "loss": 2.3546, "step": 2044 }, { "epoch": 0.27, "grad_norm": 0.318359375, "learning_rate": 4.975022447803373e-05, "loss": 2.2677, "step": 2045 }, { "epoch": 0.27, "grad_norm": 0.296875, "learning_rate": 4.9749925934784644e-05, "loss": 2.2749, "step": 2046 }, { "epoch": 0.27, "grad_norm": 0.287109375, "learning_rate": 4.974962721412257e-05, "loss": 2.3206, "step": 2047 }, { "epoch": 0.27, "grad_norm": 0.2890625, "learning_rate": 4.974932831604968e-05, "loss": 2.3187, "step": 2048 }, { "epoch": 0.27, "grad_norm": 0.28515625, "learning_rate": 4.9749029240568104e-05, "loss": 2.267, "step": 2049 }, { "epoch": 0.27, "grad_norm": 0.337890625, "learning_rate": 4.9748729987679976e-05, "loss": 2.2757, "step": 2050 }, { "epoch": 0.27, "grad_norm": 0.28515625, "learning_rate": 4.974843055738746e-05, "loss": 2.3503, "step": 2051 }, { "epoch": 0.27, "grad_norm": 0.3046875, "learning_rate": 4.9748130949692684e-05, "loss": 2.3142, "step": 2052 }, { "epoch": 0.27, "grad_norm": 0.3203125, "learning_rate": 4.974783116459781e-05, "loss": 2.3019, "step": 2053 }, { "epoch": 0.27, "grad_norm": 0.322265625, "learning_rate": 4.974753120210498e-05, "loss": 2.3043, "step": 2054 }, { "epoch": 0.27, "grad_norm": 0.298828125, "learning_rate": 4.974723106221634e-05, "loss": 2.2744, "step": 2055 }, { "epoch": 0.27, "grad_norm": 0.302734375, "learning_rate": 4.974693074493405e-05, "loss": 2.3308, "step": 2056 }, { "epoch": 0.27, "grad_norm": 0.30078125, "learning_rate": 4.974663025026026e-05, "loss": 2.2929, "step": 2057 }, { "epoch": 0.27, "grad_norm": 0.29296875, "learning_rate": 4.974632957819711e-05, "loss": 2.2997, "step": 2058 }, { "epoch": 0.27, "grad_norm": 0.3203125, "learning_rate": 4.974602872874679e-05, "loss": 2.3134, "step": 2059 }, { "epoch": 0.27, "grad_norm": 0.314453125, "learning_rate": 4.9745727701911424e-05, "loss": 2.2752, "step": 2060 }, { "epoch": 0.27, "grad_norm": 0.291015625, "learning_rate": 4.974542649769318e-05, "loss": 2.3043, "step": 2061 }, { "epoch": 0.28, "grad_norm": 0.28515625, "learning_rate": 4.974512511609423e-05, "loss": 2.2922, "step": 2062 }, { "epoch": 0.28, "grad_norm": 0.3203125, "learning_rate": 4.974482355711671e-05, "loss": 2.3084, "step": 2063 }, { "epoch": 0.28, "grad_norm": 0.3046875, "learning_rate": 4.97445218207628e-05, "loss": 2.3068, "step": 2064 }, { "epoch": 0.28, "grad_norm": 0.296875, "learning_rate": 4.974421990703465e-05, "loss": 2.3006, "step": 2065 }, { "epoch": 0.28, "grad_norm": 0.33203125, "learning_rate": 4.974391781593444e-05, "loss": 2.2774, "step": 2066 }, { "epoch": 0.28, "grad_norm": 0.31640625, "learning_rate": 4.974361554746432e-05, "loss": 2.2641, "step": 2067 }, { "epoch": 0.28, "grad_norm": 0.31640625, "learning_rate": 4.974331310162647e-05, "loss": 2.285, "step": 2068 }, { "epoch": 0.28, "grad_norm": 0.310546875, "learning_rate": 4.974301047842305e-05, "loss": 2.319, "step": 2069 }, { "epoch": 0.28, "grad_norm": 0.28515625, "learning_rate": 4.974270767785623e-05, "loss": 2.3203, "step": 2070 }, { "epoch": 0.28, "grad_norm": 0.29296875, "learning_rate": 4.974240469992818e-05, "loss": 2.2831, "step": 2071 }, { "epoch": 0.28, "grad_norm": 0.330078125, "learning_rate": 4.9742101544641076e-05, "loss": 2.3187, "step": 2072 }, { "epoch": 0.28, "grad_norm": 0.337890625, "learning_rate": 4.974179821199708e-05, "loss": 2.2811, "step": 2073 }, { "epoch": 0.28, "grad_norm": 0.302734375, "learning_rate": 4.9741494701998384e-05, "loss": 2.2952, "step": 2074 }, { "epoch": 0.28, "grad_norm": 0.2890625, "learning_rate": 4.974119101464715e-05, "loss": 2.2787, "step": 2075 }, { "epoch": 0.28, "grad_norm": 0.310546875, "learning_rate": 4.974088714994556e-05, "loss": 2.2828, "step": 2076 }, { "epoch": 0.28, "grad_norm": 0.294921875, "learning_rate": 4.974058310789579e-05, "loss": 2.3076, "step": 2077 }, { "epoch": 0.28, "grad_norm": 0.287109375, "learning_rate": 4.9740278888500016e-05, "loss": 2.2885, "step": 2078 }, { "epoch": 0.28, "grad_norm": 0.318359375, "learning_rate": 4.973997449176043e-05, "loss": 2.3256, "step": 2079 }, { "epoch": 0.28, "grad_norm": 0.318359375, "learning_rate": 4.9739669917679204e-05, "loss": 2.2852, "step": 2080 }, { "epoch": 0.28, "grad_norm": 0.291015625, "learning_rate": 4.9739365166258525e-05, "loss": 2.3022, "step": 2081 }, { "epoch": 0.28, "grad_norm": 0.294921875, "learning_rate": 4.973906023750058e-05, "loss": 2.3192, "step": 2082 }, { "epoch": 0.28, "grad_norm": 0.322265625, "learning_rate": 4.973875513140755e-05, "loss": 2.3083, "step": 2083 }, { "epoch": 0.28, "grad_norm": 0.306640625, "learning_rate": 4.973844984798163e-05, "loss": 2.3068, "step": 2084 }, { "epoch": 0.28, "grad_norm": 0.30078125, "learning_rate": 4.973814438722499e-05, "loss": 2.2941, "step": 2085 }, { "epoch": 0.28, "grad_norm": 0.296875, "learning_rate": 4.973783874913984e-05, "loss": 2.3401, "step": 2086 }, { "epoch": 0.28, "grad_norm": 0.283203125, "learning_rate": 4.973753293372836e-05, "loss": 2.2912, "step": 2087 }, { "epoch": 0.28, "grad_norm": 0.2890625, "learning_rate": 4.973722694099275e-05, "loss": 2.3115, "step": 2088 }, { "epoch": 0.28, "grad_norm": 0.314453125, "learning_rate": 4.973692077093519e-05, "loss": 2.3116, "step": 2089 }, { "epoch": 0.28, "grad_norm": 0.294921875, "learning_rate": 4.973661442355789e-05, "loss": 2.2838, "step": 2090 }, { "epoch": 0.28, "grad_norm": 0.291015625, "learning_rate": 4.9736307898863035e-05, "loss": 2.328, "step": 2091 }, { "epoch": 0.28, "grad_norm": 0.302734375, "learning_rate": 4.973600119685283e-05, "loss": 2.3003, "step": 2092 }, { "epoch": 0.28, "grad_norm": 0.2890625, "learning_rate": 4.9735694317529466e-05, "loss": 2.2969, "step": 2093 }, { "epoch": 0.28, "grad_norm": 0.28515625, "learning_rate": 4.973538726089515e-05, "loss": 2.2754, "step": 2094 }, { "epoch": 0.28, "grad_norm": 0.2890625, "learning_rate": 4.973508002695208e-05, "loss": 2.2934, "step": 2095 }, { "epoch": 0.28, "grad_norm": 0.302734375, "learning_rate": 4.9734772615702456e-05, "loss": 2.2913, "step": 2096 }, { "epoch": 0.28, "grad_norm": 0.306640625, "learning_rate": 4.973446502714848e-05, "loss": 2.2973, "step": 2097 }, { "epoch": 0.28, "grad_norm": 0.29296875, "learning_rate": 4.973415726129237e-05, "loss": 2.2791, "step": 2098 }, { "epoch": 0.28, "grad_norm": 0.2890625, "learning_rate": 4.9733849318136314e-05, "loss": 2.276, "step": 2099 }, { "epoch": 0.28, "grad_norm": 0.298828125, "learning_rate": 4.9733541197682534e-05, "loss": 2.2925, "step": 2100 }, { "epoch": 0.28, "grad_norm": 0.306640625, "learning_rate": 4.9733232899933225e-05, "loss": 2.2985, "step": 2101 }, { "epoch": 0.28, "grad_norm": 0.306640625, "learning_rate": 4.973292442489062e-05, "loss": 2.3054, "step": 2102 }, { "epoch": 0.28, "grad_norm": 0.298828125, "learning_rate": 4.97326157725569e-05, "loss": 2.2867, "step": 2103 }, { "epoch": 0.28, "grad_norm": 0.3046875, "learning_rate": 4.973230694293429e-05, "loss": 2.294, "step": 2104 }, { "epoch": 0.28, "grad_norm": 0.30078125, "learning_rate": 4.973199793602501e-05, "loss": 2.2892, "step": 2105 }, { "epoch": 0.28, "grad_norm": 0.3046875, "learning_rate": 4.973168875183127e-05, "loss": 2.2982, "step": 2106 }, { "epoch": 0.28, "grad_norm": 0.306640625, "learning_rate": 4.9731379390355295e-05, "loss": 2.2583, "step": 2107 }, { "epoch": 0.28, "grad_norm": 0.291015625, "learning_rate": 4.973106985159929e-05, "loss": 2.3033, "step": 2108 }, { "epoch": 0.28, "grad_norm": 0.2890625, "learning_rate": 4.973076013556548e-05, "loss": 2.2898, "step": 2109 }, { "epoch": 0.28, "grad_norm": 0.2890625, "learning_rate": 4.973045024225609e-05, "loss": 2.2878, "step": 2110 }, { "epoch": 0.28, "grad_norm": 0.322265625, "learning_rate": 4.973014017167332e-05, "loss": 2.3095, "step": 2111 }, { "epoch": 0.28, "grad_norm": 0.322265625, "learning_rate": 4.9729829923819424e-05, "loss": 2.3137, "step": 2112 }, { "epoch": 0.28, "grad_norm": 0.298828125, "learning_rate": 4.9729519498696595e-05, "loss": 2.2833, "step": 2113 }, { "epoch": 0.28, "grad_norm": 0.283203125, "learning_rate": 4.972920889630708e-05, "loss": 2.3147, "step": 2114 }, { "epoch": 0.28, "grad_norm": 0.30078125, "learning_rate": 4.97288981166531e-05, "loss": 2.2852, "step": 2115 }, { "epoch": 0.28, "grad_norm": 0.30078125, "learning_rate": 4.972858715973688e-05, "loss": 2.276, "step": 2116 }, { "epoch": 0.28, "grad_norm": 0.298828125, "learning_rate": 4.9728276025560646e-05, "loss": 2.2935, "step": 2117 }, { "epoch": 0.28, "grad_norm": 0.296875, "learning_rate": 4.972796471412664e-05, "loss": 2.2872, "step": 2118 }, { "epoch": 0.28, "grad_norm": 0.3125, "learning_rate": 4.972765322543709e-05, "loss": 2.3102, "step": 2119 }, { "epoch": 0.28, "grad_norm": 0.296875, "learning_rate": 4.972734155949421e-05, "loss": 2.3071, "step": 2120 }, { "epoch": 0.28, "grad_norm": 0.3046875, "learning_rate": 4.972702971630027e-05, "loss": 2.2979, "step": 2121 }, { "epoch": 0.28, "grad_norm": 0.302734375, "learning_rate": 4.972671769585746e-05, "loss": 2.2946, "step": 2122 }, { "epoch": 0.28, "grad_norm": 0.294921875, "learning_rate": 4.972640549816806e-05, "loss": 2.3269, "step": 2123 }, { "epoch": 0.28, "grad_norm": 0.283203125, "learning_rate": 4.9726093123234274e-05, "loss": 2.3061, "step": 2124 }, { "epoch": 0.28, "grad_norm": 0.287109375, "learning_rate": 4.972578057105837e-05, "loss": 2.3157, "step": 2125 }, { "epoch": 0.28, "grad_norm": 0.3046875, "learning_rate": 4.9725467841642565e-05, "loss": 2.2863, "step": 2126 }, { "epoch": 0.28, "grad_norm": 0.279296875, "learning_rate": 4.9725154934989117e-05, "loss": 2.3074, "step": 2127 }, { "epoch": 0.28, "grad_norm": 0.291015625, "learning_rate": 4.972484185110026e-05, "loss": 2.2819, "step": 2128 }, { "epoch": 0.28, "grad_norm": 0.306640625, "learning_rate": 4.972452858997824e-05, "loss": 2.2994, "step": 2129 }, { "epoch": 0.28, "grad_norm": 0.310546875, "learning_rate": 4.9724215151625296e-05, "loss": 2.2916, "step": 2130 }, { "epoch": 0.28, "grad_norm": 0.298828125, "learning_rate": 4.9723901536043685e-05, "loss": 2.3157, "step": 2131 }, { "epoch": 0.28, "grad_norm": 0.3125, "learning_rate": 4.972358774323566e-05, "loss": 2.2903, "step": 2132 }, { "epoch": 0.28, "grad_norm": 0.318359375, "learning_rate": 4.972327377320345e-05, "loss": 2.2856, "step": 2133 }, { "epoch": 0.28, "grad_norm": 0.287109375, "learning_rate": 4.972295962594933e-05, "loss": 2.2734, "step": 2134 }, { "epoch": 0.28, "grad_norm": 0.287109375, "learning_rate": 4.972264530147553e-05, "loss": 2.3022, "step": 2135 }, { "epoch": 0.28, "grad_norm": 0.30859375, "learning_rate": 4.972233079978432e-05, "loss": 2.2866, "step": 2136 }, { "epoch": 0.29, "grad_norm": 0.30859375, "learning_rate": 4.9722016120877944e-05, "loss": 2.3075, "step": 2137 }, { "epoch": 0.29, "grad_norm": 0.287109375, "learning_rate": 4.9721701264758665e-05, "loss": 2.3044, "step": 2138 }, { "epoch": 0.29, "grad_norm": 0.287109375, "learning_rate": 4.972138623142873e-05, "loss": 2.2883, "step": 2139 }, { "epoch": 0.29, "grad_norm": 0.2890625, "learning_rate": 4.972107102089041e-05, "loss": 2.3147, "step": 2140 }, { "epoch": 0.29, "grad_norm": 0.294921875, "learning_rate": 4.972075563314595e-05, "loss": 2.2852, "step": 2141 }, { "epoch": 0.29, "grad_norm": 0.31640625, "learning_rate": 4.972044006819761e-05, "loss": 2.3029, "step": 2142 }, { "epoch": 0.29, "grad_norm": 0.314453125, "learning_rate": 4.972012432604768e-05, "loss": 2.2834, "step": 2143 }, { "epoch": 0.29, "grad_norm": 0.30078125, "learning_rate": 4.97198084066984e-05, "loss": 2.3047, "step": 2144 }, { "epoch": 0.29, "grad_norm": 0.28515625, "learning_rate": 4.9719492310152025e-05, "loss": 2.2755, "step": 2145 }, { "epoch": 0.29, "grad_norm": 0.287109375, "learning_rate": 4.9719176036410854e-05, "loss": 2.2872, "step": 2146 }, { "epoch": 0.29, "grad_norm": 0.279296875, "learning_rate": 4.971885958547712e-05, "loss": 2.241, "step": 2147 }, { "epoch": 0.29, "grad_norm": 0.29296875, "learning_rate": 4.971854295735311e-05, "loss": 2.2882, "step": 2148 }, { "epoch": 0.29, "grad_norm": 0.298828125, "learning_rate": 4.9718226152041085e-05, "loss": 2.3186, "step": 2149 }, { "epoch": 0.29, "grad_norm": 0.3125, "learning_rate": 4.9717909169543326e-05, "loss": 2.3097, "step": 2150 }, { "epoch": 0.29, "grad_norm": 0.322265625, "learning_rate": 4.97175920098621e-05, "loss": 2.2661, "step": 2151 }, { "epoch": 0.29, "grad_norm": 0.283203125, "learning_rate": 4.971727467299967e-05, "loss": 2.3153, "step": 2152 }, { "epoch": 0.29, "grad_norm": 0.28515625, "learning_rate": 4.971695715895833e-05, "loss": 2.2878, "step": 2153 }, { "epoch": 0.29, "grad_norm": 0.279296875, "learning_rate": 4.9716639467740346e-05, "loss": 2.3389, "step": 2154 }, { "epoch": 0.29, "grad_norm": 0.302734375, "learning_rate": 4.971632159934799e-05, "loss": 2.2711, "step": 2155 }, { "epoch": 0.29, "grad_norm": 0.2890625, "learning_rate": 4.971600355378355e-05, "loss": 2.3286, "step": 2156 }, { "epoch": 0.29, "grad_norm": 0.28125, "learning_rate": 4.971568533104931e-05, "loss": 2.3134, "step": 2157 }, { "epoch": 0.29, "grad_norm": 0.3046875, "learning_rate": 4.9715366931147536e-05, "loss": 2.2983, "step": 2158 }, { "epoch": 0.29, "grad_norm": 0.2890625, "learning_rate": 4.971504835408052e-05, "loss": 2.301, "step": 2159 }, { "epoch": 0.29, "grad_norm": 0.298828125, "learning_rate": 4.971472959985054e-05, "loss": 2.2994, "step": 2160 }, { "epoch": 0.29, "grad_norm": 0.287109375, "learning_rate": 4.971441066845989e-05, "loss": 2.3176, "step": 2161 }, { "epoch": 0.29, "grad_norm": 0.2890625, "learning_rate": 4.971409155991085e-05, "loss": 2.3014, "step": 2162 }, { "epoch": 0.29, "grad_norm": 0.27734375, "learning_rate": 4.97137722742057e-05, "loss": 2.2984, "step": 2163 }, { "epoch": 0.29, "grad_norm": 0.28515625, "learning_rate": 4.971345281134674e-05, "loss": 2.3011, "step": 2164 }, { "epoch": 0.29, "grad_norm": 0.30078125, "learning_rate": 4.971313317133626e-05, "loss": 2.2968, "step": 2165 }, { "epoch": 0.29, "grad_norm": 0.3046875, "learning_rate": 4.9712813354176546e-05, "loss": 2.2784, "step": 2166 }, { "epoch": 0.29, "grad_norm": 0.310546875, "learning_rate": 4.971249335986989e-05, "loss": 2.3284, "step": 2167 }, { "epoch": 0.29, "grad_norm": 0.302734375, "learning_rate": 4.97121731884186e-05, "loss": 2.2638, "step": 2168 }, { "epoch": 0.29, "grad_norm": 0.296875, "learning_rate": 4.9711852839824944e-05, "loss": 2.2874, "step": 2169 }, { "epoch": 0.29, "grad_norm": 0.27734375, "learning_rate": 4.971153231409124e-05, "loss": 2.3264, "step": 2170 }, { "epoch": 0.29, "grad_norm": 0.302734375, "learning_rate": 4.971121161121978e-05, "loss": 2.3195, "step": 2171 }, { "epoch": 0.29, "grad_norm": 0.31640625, "learning_rate": 4.971089073121286e-05, "loss": 2.3279, "step": 2172 }, { "epoch": 0.29, "grad_norm": 0.30078125, "learning_rate": 4.9710569674072783e-05, "loss": 2.306, "step": 2173 }, { "epoch": 0.29, "grad_norm": 0.29296875, "learning_rate": 4.9710248439801856e-05, "loss": 2.3204, "step": 2174 }, { "epoch": 0.29, "grad_norm": 0.3046875, "learning_rate": 4.970992702840237e-05, "loss": 2.2961, "step": 2175 }, { "epoch": 0.29, "grad_norm": 0.291015625, "learning_rate": 4.9709605439876627e-05, "loss": 2.2781, "step": 2176 }, { "epoch": 0.29, "grad_norm": 0.283203125, "learning_rate": 4.9709283674226946e-05, "loss": 2.3092, "step": 2177 }, { "epoch": 0.29, "grad_norm": 0.291015625, "learning_rate": 4.970896173145563e-05, "loss": 2.3094, "step": 2178 }, { "epoch": 0.29, "grad_norm": 0.326171875, "learning_rate": 4.970863961156498e-05, "loss": 2.2916, "step": 2179 }, { "epoch": 0.29, "grad_norm": 0.296875, "learning_rate": 4.9708317314557306e-05, "loss": 2.2582, "step": 2180 }, { "epoch": 0.29, "grad_norm": 0.310546875, "learning_rate": 4.970799484043492e-05, "loss": 2.28, "step": 2181 }, { "epoch": 0.29, "grad_norm": 0.30078125, "learning_rate": 4.9707672189200145e-05, "loss": 2.3152, "step": 2182 }, { "epoch": 0.29, "grad_norm": 0.302734375, "learning_rate": 4.970734936085528e-05, "loss": 2.3119, "step": 2183 }, { "epoch": 0.29, "grad_norm": 0.302734375, "learning_rate": 4.970702635540264e-05, "loss": 2.3251, "step": 2184 }, { "epoch": 0.29, "grad_norm": 0.306640625, "learning_rate": 4.970670317284454e-05, "loss": 2.2879, "step": 2185 }, { "epoch": 0.29, "grad_norm": 0.27734375, "learning_rate": 4.97063798131833e-05, "loss": 2.2815, "step": 2186 }, { "epoch": 0.29, "grad_norm": 0.2890625, "learning_rate": 4.970605627642124e-05, "loss": 2.3085, "step": 2187 }, { "epoch": 0.29, "grad_norm": 0.28125, "learning_rate": 4.970573256256068e-05, "loss": 2.3171, "step": 2188 }, { "epoch": 0.29, "grad_norm": 0.298828125, "learning_rate": 4.9705408671603935e-05, "loss": 2.3131, "step": 2189 }, { "epoch": 0.29, "grad_norm": 0.2734375, "learning_rate": 4.970508460355333e-05, "loss": 2.2921, "step": 2190 }, { "epoch": 0.29, "grad_norm": 0.28125, "learning_rate": 4.970476035841118e-05, "loss": 2.3125, "step": 2191 }, { "epoch": 0.29, "grad_norm": 0.302734375, "learning_rate": 4.970443593617982e-05, "loss": 2.2663, "step": 2192 }, { "epoch": 0.29, "grad_norm": 0.291015625, "learning_rate": 4.970411133686157e-05, "loss": 2.3229, "step": 2193 }, { "epoch": 0.29, "grad_norm": 0.294921875, "learning_rate": 4.970378656045876e-05, "loss": 2.3203, "step": 2194 }, { "epoch": 0.29, "grad_norm": 0.310546875, "learning_rate": 4.970346160697372e-05, "loss": 2.2946, "step": 2195 }, { "epoch": 0.29, "grad_norm": 0.314453125, "learning_rate": 4.9703136476408776e-05, "loss": 2.3087, "step": 2196 }, { "epoch": 0.29, "grad_norm": 0.291015625, "learning_rate": 4.970281116876625e-05, "loss": 2.3123, "step": 2197 }, { "epoch": 0.29, "grad_norm": 0.2890625, "learning_rate": 4.9702485684048487e-05, "loss": 2.3112, "step": 2198 }, { "epoch": 0.29, "grad_norm": 0.296875, "learning_rate": 4.9702160022257814e-05, "loss": 2.2959, "step": 2199 }, { "epoch": 0.29, "grad_norm": 0.287109375, "learning_rate": 4.970183418339657e-05, "loss": 2.3198, "step": 2200 }, { "epoch": 0.29, "grad_norm": 0.3203125, "learning_rate": 4.9701508167467094e-05, "loss": 2.2917, "step": 2201 }, { "epoch": 0.29, "grad_norm": 0.287109375, "learning_rate": 4.97011819744717e-05, "loss": 2.2734, "step": 2202 }, { "epoch": 0.29, "grad_norm": 0.2890625, "learning_rate": 4.9700855604412754e-05, "loss": 2.2957, "step": 2203 }, { "epoch": 0.29, "grad_norm": 0.29296875, "learning_rate": 4.9700529057292586e-05, "loss": 2.2985, "step": 2204 }, { "epoch": 0.29, "grad_norm": 0.291015625, "learning_rate": 4.970020233311353e-05, "loss": 2.3192, "step": 2205 }, { "epoch": 0.29, "grad_norm": 0.28515625, "learning_rate": 4.969987543187793e-05, "loss": 2.2578, "step": 2206 }, { "epoch": 0.29, "grad_norm": 0.30078125, "learning_rate": 4.9699548353588136e-05, "loss": 2.2879, "step": 2207 }, { "epoch": 0.29, "grad_norm": 0.30078125, "learning_rate": 4.9699221098246495e-05, "loss": 2.3181, "step": 2208 }, { "epoch": 0.29, "grad_norm": 0.291015625, "learning_rate": 4.969889366585534e-05, "loss": 2.2974, "step": 2209 }, { "epoch": 0.29, "grad_norm": 0.294921875, "learning_rate": 4.969856605641702e-05, "loss": 2.2936, "step": 2210 }, { "epoch": 0.29, "grad_norm": 0.2734375, "learning_rate": 4.969823826993389e-05, "loss": 2.286, "step": 2211 }, { "epoch": 0.3, "grad_norm": 0.29296875, "learning_rate": 4.969791030640831e-05, "loss": 2.299, "step": 2212 }, { "epoch": 0.3, "grad_norm": 0.294921875, "learning_rate": 4.969758216584261e-05, "loss": 2.3056, "step": 2213 }, { "epoch": 0.3, "grad_norm": 0.31640625, "learning_rate": 4.969725384823916e-05, "loss": 2.3064, "step": 2214 }, { "epoch": 0.3, "grad_norm": 0.294921875, "learning_rate": 4.969692535360029e-05, "loss": 2.3355, "step": 2215 }, { "epoch": 0.3, "grad_norm": 0.287109375, "learning_rate": 4.9696596681928374e-05, "loss": 2.2908, "step": 2216 }, { "epoch": 0.3, "grad_norm": 0.326171875, "learning_rate": 4.9696267833225765e-05, "loss": 2.2702, "step": 2217 }, { "epoch": 0.3, "grad_norm": 0.3046875, "learning_rate": 4.9695938807494815e-05, "loss": 2.2833, "step": 2218 }, { "epoch": 0.3, "grad_norm": 0.2890625, "learning_rate": 4.969560960473789e-05, "loss": 2.2749, "step": 2219 }, { "epoch": 0.3, "grad_norm": 0.29296875, "learning_rate": 4.969528022495734e-05, "loss": 2.3315, "step": 2220 }, { "epoch": 0.3, "grad_norm": 0.345703125, "learning_rate": 4.9694950668155534e-05, "loss": 2.2948, "step": 2221 }, { "epoch": 0.3, "grad_norm": 0.3125, "learning_rate": 4.969462093433484e-05, "loss": 2.3023, "step": 2222 }, { "epoch": 0.3, "grad_norm": 0.314453125, "learning_rate": 4.969429102349761e-05, "loss": 2.2703, "step": 2223 }, { "epoch": 0.3, "grad_norm": 0.298828125, "learning_rate": 4.969396093564621e-05, "loss": 2.2694, "step": 2224 }, { "epoch": 0.3, "grad_norm": 0.326171875, "learning_rate": 4.9693630670783006e-05, "loss": 2.328, "step": 2225 }, { "epoch": 0.3, "grad_norm": 0.3046875, "learning_rate": 4.969330022891037e-05, "loss": 2.2703, "step": 2226 }, { "epoch": 0.3, "grad_norm": 0.302734375, "learning_rate": 4.969296961003067e-05, "loss": 2.304, "step": 2227 }, { "epoch": 0.3, "grad_norm": 0.30078125, "learning_rate": 4.969263881414627e-05, "loss": 2.2811, "step": 2228 }, { "epoch": 0.3, "grad_norm": 0.31640625, "learning_rate": 4.9692307841259555e-05, "loss": 2.3089, "step": 2229 }, { "epoch": 0.3, "grad_norm": 0.291015625, "learning_rate": 4.969197669137289e-05, "loss": 2.3119, "step": 2230 }, { "epoch": 0.3, "grad_norm": 0.29296875, "learning_rate": 4.969164536448864e-05, "loss": 2.3103, "step": 2231 }, { "epoch": 0.3, "grad_norm": 0.30859375, "learning_rate": 4.969131386060919e-05, "loss": 2.32, "step": 2232 }, { "epoch": 0.3, "grad_norm": 0.306640625, "learning_rate": 4.969098217973691e-05, "loss": 2.2704, "step": 2233 }, { "epoch": 0.3, "grad_norm": 0.296875, "learning_rate": 4.9690650321874185e-05, "loss": 2.285, "step": 2234 }, { "epoch": 0.3, "grad_norm": 0.302734375, "learning_rate": 4.969031828702339e-05, "loss": 2.2926, "step": 2235 }, { "epoch": 0.3, "grad_norm": 0.296875, "learning_rate": 4.96899860751869e-05, "loss": 2.3058, "step": 2236 }, { "epoch": 0.3, "grad_norm": 0.318359375, "learning_rate": 4.968965368636711e-05, "loss": 2.2877, "step": 2237 }, { "epoch": 0.3, "grad_norm": 0.298828125, "learning_rate": 4.968932112056639e-05, "loss": 2.2766, "step": 2238 }, { "epoch": 0.3, "grad_norm": 0.30078125, "learning_rate": 4.968898837778713e-05, "loss": 2.289, "step": 2239 }, { "epoch": 0.3, "grad_norm": 0.30078125, "learning_rate": 4.9688655458031705e-05, "loss": 2.3102, "step": 2240 }, { "epoch": 0.3, "grad_norm": 0.314453125, "learning_rate": 4.968832236130252e-05, "loss": 2.3456, "step": 2241 }, { "epoch": 0.3, "grad_norm": 0.310546875, "learning_rate": 4.968798908760194e-05, "loss": 2.2909, "step": 2242 }, { "epoch": 0.3, "grad_norm": 0.306640625, "learning_rate": 4.968765563693238e-05, "loss": 2.3083, "step": 2243 }, { "epoch": 0.3, "grad_norm": 0.28515625, "learning_rate": 4.968732200929621e-05, "loss": 2.3035, "step": 2244 }, { "epoch": 0.3, "grad_norm": 0.28125, "learning_rate": 4.9686988204695826e-05, "loss": 2.2981, "step": 2245 }, { "epoch": 0.3, "grad_norm": 0.30859375, "learning_rate": 4.968665422313362e-05, "loss": 2.2813, "step": 2246 }, { "epoch": 0.3, "grad_norm": 0.294921875, "learning_rate": 4.9686320064612e-05, "loss": 2.2979, "step": 2247 }, { "epoch": 0.3, "grad_norm": 0.302734375, "learning_rate": 4.9685985729133344e-05, "loss": 2.2729, "step": 2248 }, { "epoch": 0.3, "grad_norm": 0.29296875, "learning_rate": 4.9685651216700054e-05, "loss": 2.304, "step": 2249 }, { "epoch": 0.3, "grad_norm": 0.27734375, "learning_rate": 4.968531652731453e-05, "loss": 2.3035, "step": 2250 }, { "epoch": 0.3, "grad_norm": 0.287109375, "learning_rate": 4.968498166097917e-05, "loss": 2.2962, "step": 2251 }, { "epoch": 0.3, "grad_norm": 0.283203125, "learning_rate": 4.968464661769637e-05, "loss": 2.3018, "step": 2252 }, { "epoch": 0.3, "grad_norm": 0.294921875, "learning_rate": 4.968431139746854e-05, "loss": 2.2992, "step": 2253 }, { "epoch": 0.3, "grad_norm": 0.296875, "learning_rate": 4.968397600029808e-05, "loss": 2.2883, "step": 2254 }, { "epoch": 0.3, "grad_norm": 0.3046875, "learning_rate": 4.9683640426187395e-05, "loss": 2.2653, "step": 2255 }, { "epoch": 0.3, "grad_norm": 0.2890625, "learning_rate": 4.968330467513889e-05, "loss": 2.3043, "step": 2256 }, { "epoch": 0.3, "grad_norm": 0.2734375, "learning_rate": 4.968296874715495e-05, "loss": 2.2896, "step": 2257 }, { "epoch": 0.3, "grad_norm": 0.27734375, "learning_rate": 4.9682632642238024e-05, "loss": 2.2756, "step": 2258 }, { "epoch": 0.3, "grad_norm": 0.3125, "learning_rate": 4.9682296360390496e-05, "loss": 2.2812, "step": 2259 }, { "epoch": 0.3, "grad_norm": 0.283203125, "learning_rate": 4.9681959901614786e-05, "loss": 2.2964, "step": 2260 }, { "epoch": 0.3, "grad_norm": 0.3046875, "learning_rate": 4.9681623265913293e-05, "loss": 2.2933, "step": 2261 }, { "epoch": 0.3, "grad_norm": 0.271484375, "learning_rate": 4.968128645328843e-05, "loss": 2.2864, "step": 2262 }, { "epoch": 0.3, "grad_norm": 0.291015625, "learning_rate": 4.968094946374263e-05, "loss": 2.3292, "step": 2263 }, { "epoch": 0.3, "grad_norm": 0.318359375, "learning_rate": 4.96806122972783e-05, "loss": 2.2636, "step": 2264 }, { "epoch": 0.3, "grad_norm": 0.2890625, "learning_rate": 4.968027495389785e-05, "loss": 2.2675, "step": 2265 }, { "epoch": 0.3, "grad_norm": 0.279296875, "learning_rate": 4.967993743360371e-05, "loss": 2.289, "step": 2266 }, { "epoch": 0.3, "grad_norm": 0.291015625, "learning_rate": 4.967959973639828e-05, "loss": 2.293, "step": 2267 }, { "epoch": 0.3, "grad_norm": 0.26953125, "learning_rate": 4.9679261862283996e-05, "loss": 2.289, "step": 2268 }, { "epoch": 0.3, "grad_norm": 0.30078125, "learning_rate": 4.967892381126329e-05, "loss": 2.3036, "step": 2269 }, { "epoch": 0.3, "grad_norm": 0.3046875, "learning_rate": 4.9678585583338555e-05, "loss": 2.2861, "step": 2270 }, { "epoch": 0.3, "grad_norm": 0.2890625, "learning_rate": 4.9678247178512245e-05, "loss": 2.2789, "step": 2271 }, { "epoch": 0.3, "grad_norm": 0.29296875, "learning_rate": 4.967790859678677e-05, "loss": 2.3199, "step": 2272 }, { "epoch": 0.3, "grad_norm": 0.3046875, "learning_rate": 4.9677569838164555e-05, "loss": 2.2981, "step": 2273 }, { "epoch": 0.3, "grad_norm": 0.296875, "learning_rate": 4.967723090264804e-05, "loss": 2.2715, "step": 2274 }, { "epoch": 0.3, "grad_norm": 0.30859375, "learning_rate": 4.9676891790239646e-05, "loss": 2.281, "step": 2275 }, { "epoch": 0.3, "grad_norm": 0.287109375, "learning_rate": 4.967655250094181e-05, "loss": 2.3054, "step": 2276 }, { "epoch": 0.3, "grad_norm": 0.291015625, "learning_rate": 4.967621303475696e-05, "loss": 2.2766, "step": 2277 }, { "epoch": 0.3, "grad_norm": 0.291015625, "learning_rate": 4.967587339168752e-05, "loss": 2.3213, "step": 2278 }, { "epoch": 0.3, "grad_norm": 0.30078125, "learning_rate": 4.967553357173594e-05, "loss": 2.2968, "step": 2279 }, { "epoch": 0.3, "grad_norm": 0.31640625, "learning_rate": 4.9675193574904656e-05, "loss": 2.299, "step": 2280 }, { "epoch": 0.3, "grad_norm": 0.3046875, "learning_rate": 4.9674853401196084e-05, "loss": 2.2994, "step": 2281 }, { "epoch": 0.3, "grad_norm": 0.298828125, "learning_rate": 4.9674513050612694e-05, "loss": 2.2732, "step": 2282 }, { "epoch": 0.3, "grad_norm": 0.2890625, "learning_rate": 4.9674172523156906e-05, "loss": 2.3031, "step": 2283 }, { "epoch": 0.3, "grad_norm": 0.29296875, "learning_rate": 4.9673831818831165e-05, "loss": 2.3234, "step": 2284 }, { "epoch": 0.3, "grad_norm": 0.302734375, "learning_rate": 4.9673490937637904e-05, "loss": 2.2647, "step": 2285 }, { "epoch": 0.3, "grad_norm": 0.291015625, "learning_rate": 4.967314987957958e-05, "loss": 2.3076, "step": 2286 }, { "epoch": 0.31, "grad_norm": 0.3125, "learning_rate": 4.9672808644658634e-05, "loss": 2.2894, "step": 2287 }, { "epoch": 0.31, "grad_norm": 0.31640625, "learning_rate": 4.967246723287752e-05, "loss": 2.2814, "step": 2288 }, { "epoch": 0.31, "grad_norm": 0.306640625, "learning_rate": 4.967212564423867e-05, "loss": 2.3105, "step": 2289 }, { "epoch": 0.31, "grad_norm": 0.30078125, "learning_rate": 4.967178387874453e-05, "loss": 2.299, "step": 2290 }, { "epoch": 0.31, "grad_norm": 0.287109375, "learning_rate": 4.9671441936397565e-05, "loss": 2.2738, "step": 2291 }, { "epoch": 0.31, "grad_norm": 0.296875, "learning_rate": 4.967109981720022e-05, "loss": 2.3172, "step": 2292 }, { "epoch": 0.31, "grad_norm": 0.306640625, "learning_rate": 4.9670757521154945e-05, "loss": 2.3173, "step": 2293 }, { "epoch": 0.31, "grad_norm": 0.318359375, "learning_rate": 4.96704150482642e-05, "loss": 2.2837, "step": 2294 }, { "epoch": 0.31, "grad_norm": 0.302734375, "learning_rate": 4.967007239853043e-05, "loss": 2.3029, "step": 2295 }, { "epoch": 0.31, "grad_norm": 0.30078125, "learning_rate": 4.96697295719561e-05, "loss": 2.3037, "step": 2296 }, { "epoch": 0.31, "grad_norm": 0.322265625, "learning_rate": 4.966938656854368e-05, "loss": 2.2774, "step": 2297 }, { "epoch": 0.31, "grad_norm": 0.279296875, "learning_rate": 4.966904338829559e-05, "loss": 2.2971, "step": 2298 }, { "epoch": 0.31, "grad_norm": 0.2890625, "learning_rate": 4.9668700031214324e-05, "loss": 2.3111, "step": 2299 }, { "epoch": 0.31, "grad_norm": 0.3046875, "learning_rate": 4.9668356497302334e-05, "loss": 2.3048, "step": 2300 }, { "epoch": 0.31, "grad_norm": 0.294921875, "learning_rate": 4.966801278656208e-05, "loss": 2.3045, "step": 2301 }, { "epoch": 0.31, "grad_norm": 0.31640625, "learning_rate": 4.9667668898996024e-05, "loss": 2.2892, "step": 2302 }, { "epoch": 0.31, "grad_norm": 0.3046875, "learning_rate": 4.966732483460663e-05, "loss": 2.2944, "step": 2303 }, { "epoch": 0.31, "grad_norm": 0.27734375, "learning_rate": 4.9666980593396374e-05, "loss": 2.3122, "step": 2304 }, { "epoch": 0.31, "grad_norm": 0.2890625, "learning_rate": 4.966663617536772e-05, "loss": 2.315, "step": 2305 }, { "epoch": 0.31, "grad_norm": 0.32421875, "learning_rate": 4.9666291580523136e-05, "loss": 2.289, "step": 2306 }, { "epoch": 0.31, "grad_norm": 0.318359375, "learning_rate": 4.966594680886508e-05, "loss": 2.2678, "step": 2307 }, { "epoch": 0.31, "grad_norm": 0.28515625, "learning_rate": 4.966560186039604e-05, "loss": 2.2654, "step": 2308 }, { "epoch": 0.31, "grad_norm": 0.310546875, "learning_rate": 4.966525673511849e-05, "loss": 2.2928, "step": 2309 }, { "epoch": 0.31, "grad_norm": 0.28125, "learning_rate": 4.966491143303489e-05, "loss": 2.3423, "step": 2310 }, { "epoch": 0.31, "grad_norm": 0.302734375, "learning_rate": 4.966456595414772e-05, "loss": 2.2888, "step": 2311 }, { "epoch": 0.31, "grad_norm": 0.314453125, "learning_rate": 4.9664220298459465e-05, "loss": 2.2891, "step": 2312 }, { "epoch": 0.31, "grad_norm": 0.3203125, "learning_rate": 4.966387446597259e-05, "loss": 2.2716, "step": 2313 }, { "epoch": 0.31, "grad_norm": 0.322265625, "learning_rate": 4.9663528456689584e-05, "loss": 2.3061, "step": 2314 }, { "epoch": 0.31, "grad_norm": 0.310546875, "learning_rate": 4.966318227061292e-05, "loss": 2.2651, "step": 2315 }, { "epoch": 0.31, "grad_norm": 0.333984375, "learning_rate": 4.966283590774509e-05, "loss": 2.2712, "step": 2316 }, { "epoch": 0.31, "grad_norm": 0.30078125, "learning_rate": 4.966248936808856e-05, "loss": 2.314, "step": 2317 }, { "epoch": 0.31, "grad_norm": 0.294921875, "learning_rate": 4.966214265164583e-05, "loss": 2.2859, "step": 2318 }, { "epoch": 0.31, "grad_norm": 0.287109375, "learning_rate": 4.966179575841938e-05, "loss": 2.3062, "step": 2319 }, { "epoch": 0.31, "grad_norm": 0.318359375, "learning_rate": 4.966144868841169e-05, "loss": 2.3156, "step": 2320 }, { "epoch": 0.31, "grad_norm": 0.31640625, "learning_rate": 4.9661101441625266e-05, "loss": 2.259, "step": 2321 }, { "epoch": 0.31, "grad_norm": 0.310546875, "learning_rate": 4.966075401806257e-05, "loss": 2.293, "step": 2322 }, { "epoch": 0.31, "grad_norm": 0.3359375, "learning_rate": 4.9660406417726115e-05, "loss": 2.2644, "step": 2323 }, { "epoch": 0.31, "grad_norm": 0.296875, "learning_rate": 4.966005864061839e-05, "loss": 2.3015, "step": 2324 }, { "epoch": 0.31, "grad_norm": 0.32421875, "learning_rate": 4.965971068674187e-05, "loss": 2.2968, "step": 2325 }, { "epoch": 0.31, "grad_norm": 0.2890625, "learning_rate": 4.9659362556099075e-05, "loss": 2.2926, "step": 2326 }, { "epoch": 0.31, "grad_norm": 0.302734375, "learning_rate": 4.9659014248692483e-05, "loss": 2.3171, "step": 2327 }, { "epoch": 0.31, "grad_norm": 0.310546875, "learning_rate": 4.965866576452459e-05, "loss": 2.2987, "step": 2328 }, { "epoch": 0.31, "grad_norm": 0.326171875, "learning_rate": 4.965831710359791e-05, "loss": 2.2809, "step": 2329 }, { "epoch": 0.31, "grad_norm": 0.3046875, "learning_rate": 4.9657968265914925e-05, "loss": 2.2744, "step": 2330 }, { "epoch": 0.31, "grad_norm": 0.2890625, "learning_rate": 4.9657619251478135e-05, "loss": 2.2925, "step": 2331 }, { "epoch": 0.31, "grad_norm": 0.28125, "learning_rate": 4.9657270060290065e-05, "loss": 2.2797, "step": 2332 }, { "epoch": 0.31, "grad_norm": 0.314453125, "learning_rate": 4.9656920692353195e-05, "loss": 2.3077, "step": 2333 }, { "epoch": 0.31, "grad_norm": 0.2890625, "learning_rate": 4.9656571147670025e-05, "loss": 2.3098, "step": 2334 }, { "epoch": 0.31, "grad_norm": 0.314453125, "learning_rate": 4.965622142624309e-05, "loss": 2.2723, "step": 2335 }, { "epoch": 0.31, "grad_norm": 0.28125, "learning_rate": 4.965587152807487e-05, "loss": 2.254, "step": 2336 }, { "epoch": 0.31, "grad_norm": 0.3125, "learning_rate": 4.965552145316788e-05, "loss": 2.2877, "step": 2337 }, { "epoch": 0.31, "grad_norm": 0.310546875, "learning_rate": 4.9655171201524635e-05, "loss": 2.2603, "step": 2338 }, { "epoch": 0.31, "grad_norm": 0.310546875, "learning_rate": 4.9654820773147645e-05, "loss": 2.2627, "step": 2339 }, { "epoch": 0.31, "grad_norm": 0.337890625, "learning_rate": 4.965447016803941e-05, "loss": 2.3008, "step": 2340 }, { "epoch": 0.31, "grad_norm": 0.291015625, "learning_rate": 4.9654119386202465e-05, "loss": 2.3317, "step": 2341 }, { "epoch": 0.31, "grad_norm": 0.322265625, "learning_rate": 4.96537684276393e-05, "loss": 2.3073, "step": 2342 }, { "epoch": 0.31, "grad_norm": 0.31640625, "learning_rate": 4.9653417292352454e-05, "loss": 2.2891, "step": 2343 }, { "epoch": 0.31, "grad_norm": 0.283203125, "learning_rate": 4.9653065980344425e-05, "loss": 2.3054, "step": 2344 }, { "epoch": 0.31, "grad_norm": 0.30078125, "learning_rate": 4.965271449161774e-05, "loss": 2.2811, "step": 2345 }, { "epoch": 0.31, "grad_norm": 0.30078125, "learning_rate": 4.965236282617491e-05, "loss": 2.2933, "step": 2346 }, { "epoch": 0.31, "grad_norm": 0.318359375, "learning_rate": 4.965201098401848e-05, "loss": 2.2819, "step": 2347 }, { "epoch": 0.31, "grad_norm": 0.3203125, "learning_rate": 4.965165896515095e-05, "loss": 2.2543, "step": 2348 }, { "epoch": 0.31, "grad_norm": 0.3046875, "learning_rate": 4.965130676957484e-05, "loss": 2.3265, "step": 2349 }, { "epoch": 0.31, "grad_norm": 0.30859375, "learning_rate": 4.965095439729269e-05, "loss": 2.3087, "step": 2350 }, { "epoch": 0.31, "grad_norm": 0.296875, "learning_rate": 4.965060184830702e-05, "loss": 2.288, "step": 2351 }, { "epoch": 0.31, "grad_norm": 0.326171875, "learning_rate": 4.9650249122620354e-05, "loss": 2.3062, "step": 2352 }, { "epoch": 0.31, "grad_norm": 0.29296875, "learning_rate": 4.964989622023522e-05, "loss": 2.3105, "step": 2353 }, { "epoch": 0.31, "grad_norm": 0.302734375, "learning_rate": 4.9649543141154155e-05, "loss": 2.2729, "step": 2354 }, { "epoch": 0.31, "grad_norm": 0.294921875, "learning_rate": 4.9649189885379676e-05, "loss": 2.3433, "step": 2355 }, { "epoch": 0.31, "grad_norm": 0.2890625, "learning_rate": 4.964883645291434e-05, "loss": 2.3335, "step": 2356 }, { "epoch": 0.31, "grad_norm": 0.314453125, "learning_rate": 4.964848284376065e-05, "loss": 2.2927, "step": 2357 }, { "epoch": 0.31, "grad_norm": 0.271484375, "learning_rate": 4.9648129057921166e-05, "loss": 2.2965, "step": 2358 }, { "epoch": 0.31, "grad_norm": 0.298828125, "learning_rate": 4.964777509539841e-05, "loss": 2.3068, "step": 2359 }, { "epoch": 0.31, "grad_norm": 0.306640625, "learning_rate": 4.9647420956194925e-05, "loss": 2.2957, "step": 2360 }, { "epoch": 0.31, "grad_norm": 0.30078125, "learning_rate": 4.964706664031324e-05, "loss": 2.2898, "step": 2361 }, { "epoch": 0.32, "grad_norm": 0.3046875, "learning_rate": 4.964671214775591e-05, "loss": 2.3123, "step": 2362 }, { "epoch": 0.32, "grad_norm": 0.314453125, "learning_rate": 4.964635747852547e-05, "loss": 2.2488, "step": 2363 }, { "epoch": 0.32, "grad_norm": 0.296875, "learning_rate": 4.9646002632624464e-05, "loss": 2.2826, "step": 2364 }, { "epoch": 0.32, "grad_norm": 0.302734375, "learning_rate": 4.964564761005542e-05, "loss": 2.2873, "step": 2365 }, { "epoch": 0.32, "grad_norm": 0.29296875, "learning_rate": 4.964529241082091e-05, "loss": 2.2893, "step": 2366 }, { "epoch": 0.32, "grad_norm": 0.33203125, "learning_rate": 4.964493703492346e-05, "loss": 2.3104, "step": 2367 }, { "epoch": 0.32, "grad_norm": 0.30859375, "learning_rate": 4.9644581482365616e-05, "loss": 2.2912, "step": 2368 }, { "epoch": 0.32, "grad_norm": 0.287109375, "learning_rate": 4.964422575314994e-05, "loss": 2.2981, "step": 2369 }, { "epoch": 0.32, "grad_norm": 0.294921875, "learning_rate": 4.9643869847278976e-05, "loss": 2.2987, "step": 2370 }, { "epoch": 0.32, "grad_norm": 0.306640625, "learning_rate": 4.9643513764755266e-05, "loss": 2.2758, "step": 2371 }, { "epoch": 0.32, "grad_norm": 0.287109375, "learning_rate": 4.9643157505581384e-05, "loss": 2.2703, "step": 2372 }, { "epoch": 0.32, "grad_norm": 0.310546875, "learning_rate": 4.964280106975986e-05, "loss": 2.281, "step": 2373 }, { "epoch": 0.32, "grad_norm": 0.29296875, "learning_rate": 4.964244445729327e-05, "loss": 2.2934, "step": 2374 }, { "epoch": 0.32, "grad_norm": 0.2890625, "learning_rate": 4.964208766818416e-05, "loss": 2.2938, "step": 2375 }, { "epoch": 0.32, "grad_norm": 0.294921875, "learning_rate": 4.964173070243508e-05, "loss": 2.3152, "step": 2376 }, { "epoch": 0.32, "grad_norm": 0.29296875, "learning_rate": 4.96413735600486e-05, "loss": 2.3029, "step": 2377 }, { "epoch": 0.32, "grad_norm": 0.3359375, "learning_rate": 4.964101624102727e-05, "loss": 2.271, "step": 2378 }, { "epoch": 0.32, "grad_norm": 0.29296875, "learning_rate": 4.964065874537367e-05, "loss": 2.3212, "step": 2379 }, { "epoch": 0.32, "grad_norm": 0.283203125, "learning_rate": 4.964030107309035e-05, "loss": 2.2934, "step": 2380 }, { "epoch": 0.32, "grad_norm": 0.294921875, "learning_rate": 4.963994322417986e-05, "loss": 2.2873, "step": 2381 }, { "epoch": 0.32, "grad_norm": 0.283203125, "learning_rate": 4.9639585198644795e-05, "loss": 2.2785, "step": 2382 }, { "epoch": 0.32, "grad_norm": 0.294921875, "learning_rate": 4.9639226996487705e-05, "loss": 2.2963, "step": 2383 }, { "epoch": 0.32, "grad_norm": 0.30859375, "learning_rate": 4.963886861771115e-05, "loss": 2.2656, "step": 2384 }, { "epoch": 0.32, "grad_norm": 0.291015625, "learning_rate": 4.963851006231771e-05, "loss": 2.343, "step": 2385 }, { "epoch": 0.32, "grad_norm": 0.2890625, "learning_rate": 4.963815133030996e-05, "loss": 2.2824, "step": 2386 }, { "epoch": 0.32, "grad_norm": 0.3046875, "learning_rate": 4.963779242169046e-05, "loss": 2.2908, "step": 2387 }, { "epoch": 0.32, "grad_norm": 0.28125, "learning_rate": 4.9637433336461785e-05, "loss": 2.3059, "step": 2388 }, { "epoch": 0.32, "grad_norm": 0.2890625, "learning_rate": 4.963707407462652e-05, "loss": 2.2946, "step": 2389 }, { "epoch": 0.32, "grad_norm": 0.2890625, "learning_rate": 4.963671463618722e-05, "loss": 2.3265, "step": 2390 }, { "epoch": 0.32, "grad_norm": 0.287109375, "learning_rate": 4.963635502114648e-05, "loss": 2.3035, "step": 2391 }, { "epoch": 0.32, "grad_norm": 0.3046875, "learning_rate": 4.963599522950687e-05, "loss": 2.2613, "step": 2392 }, { "epoch": 0.32, "grad_norm": 0.3046875, "learning_rate": 4.9635635261270964e-05, "loss": 2.2984, "step": 2393 }, { "epoch": 0.32, "grad_norm": 0.291015625, "learning_rate": 4.9635275116441356e-05, "loss": 2.2895, "step": 2394 }, { "epoch": 0.32, "grad_norm": 0.30078125, "learning_rate": 4.963491479502062e-05, "loss": 2.2923, "step": 2395 }, { "epoch": 0.32, "grad_norm": 0.298828125, "learning_rate": 4.9634554297011325e-05, "loss": 2.3039, "step": 2396 }, { "epoch": 0.32, "grad_norm": 0.296875, "learning_rate": 4.963419362241608e-05, "loss": 2.307, "step": 2397 }, { "epoch": 0.32, "grad_norm": 0.283203125, "learning_rate": 4.963383277123746e-05, "loss": 2.3056, "step": 2398 }, { "epoch": 0.32, "grad_norm": 0.302734375, "learning_rate": 4.9633471743478046e-05, "loss": 2.2992, "step": 2399 }, { "epoch": 0.32, "grad_norm": 0.318359375, "learning_rate": 4.963311053914044e-05, "loss": 2.274, "step": 2400 }, { "epoch": 0.32, "grad_norm": 0.310546875, "learning_rate": 4.963274915822721e-05, "loss": 2.3046, "step": 2401 }, { "epoch": 0.32, "grad_norm": 0.30859375, "learning_rate": 4.963238760074096e-05, "loss": 2.3012, "step": 2402 }, { "epoch": 0.32, "grad_norm": 0.30859375, "learning_rate": 4.9632025866684285e-05, "loss": 2.2575, "step": 2403 }, { "epoch": 0.32, "grad_norm": 0.314453125, "learning_rate": 4.963166395605977e-05, "loss": 2.2646, "step": 2404 }, { "epoch": 0.32, "grad_norm": 0.310546875, "learning_rate": 4.9631301868870015e-05, "loss": 2.3048, "step": 2405 }, { "epoch": 0.32, "grad_norm": 0.337890625, "learning_rate": 4.963093960511761e-05, "loss": 2.296, "step": 2406 }, { "epoch": 0.32, "grad_norm": 0.3046875, "learning_rate": 4.963057716480516e-05, "loss": 2.296, "step": 2407 }, { "epoch": 0.32, "grad_norm": 0.302734375, "learning_rate": 4.9630214547935256e-05, "loss": 2.2722, "step": 2408 }, { "epoch": 0.32, "grad_norm": 0.287109375, "learning_rate": 4.962985175451049e-05, "loss": 2.2797, "step": 2409 }, { "epoch": 0.32, "grad_norm": 0.30078125, "learning_rate": 4.9629488784533486e-05, "loss": 2.3091, "step": 2410 }, { "epoch": 0.32, "grad_norm": 0.318359375, "learning_rate": 4.962912563800682e-05, "loss": 2.2577, "step": 2411 }, { "epoch": 0.32, "grad_norm": 0.30859375, "learning_rate": 4.9628762314933115e-05, "loss": 2.2684, "step": 2412 }, { "epoch": 0.32, "grad_norm": 0.32421875, "learning_rate": 4.962839881531496e-05, "loss": 2.2555, "step": 2413 }, { "epoch": 0.32, "grad_norm": 0.310546875, "learning_rate": 4.962803513915497e-05, "loss": 2.3224, "step": 2414 }, { "epoch": 0.32, "grad_norm": 0.302734375, "learning_rate": 4.962767128645575e-05, "loss": 2.3185, "step": 2415 }, { "epoch": 0.32, "grad_norm": 0.314453125, "learning_rate": 4.9627307257219916e-05, "loss": 2.269, "step": 2416 }, { "epoch": 0.32, "grad_norm": 0.310546875, "learning_rate": 4.962694305145006e-05, "loss": 2.2806, "step": 2417 }, { "epoch": 0.32, "grad_norm": 0.34765625, "learning_rate": 4.96265786691488e-05, "loss": 2.2903, "step": 2418 }, { "epoch": 0.32, "grad_norm": 0.298828125, "learning_rate": 4.9626214110318744e-05, "loss": 2.2471, "step": 2419 }, { "epoch": 0.32, "grad_norm": 0.306640625, "learning_rate": 4.962584937496252e-05, "loss": 2.308, "step": 2420 }, { "epoch": 0.32, "grad_norm": 0.287109375, "learning_rate": 4.962548446308274e-05, "loss": 2.2852, "step": 2421 }, { "epoch": 0.32, "grad_norm": 0.294921875, "learning_rate": 4.962511937468201e-05, "loss": 2.2904, "step": 2422 }, { "epoch": 0.32, "grad_norm": 0.296875, "learning_rate": 4.962475410976294e-05, "loss": 2.2873, "step": 2423 }, { "epoch": 0.32, "grad_norm": 0.302734375, "learning_rate": 4.962438866832817e-05, "loss": 2.2839, "step": 2424 }, { "epoch": 0.32, "grad_norm": 0.3125, "learning_rate": 4.9624023050380294e-05, "loss": 2.2933, "step": 2425 }, { "epoch": 0.32, "grad_norm": 0.2890625, "learning_rate": 4.9623657255921965e-05, "loss": 2.3229, "step": 2426 }, { "epoch": 0.32, "grad_norm": 0.30078125, "learning_rate": 4.962329128495577e-05, "loss": 2.3039, "step": 2427 }, { "epoch": 0.32, "grad_norm": 0.2890625, "learning_rate": 4.962292513748437e-05, "loss": 2.3092, "step": 2428 }, { "epoch": 0.32, "grad_norm": 0.302734375, "learning_rate": 4.962255881351036e-05, "loss": 2.2917, "step": 2429 }, { "epoch": 0.32, "grad_norm": 0.296875, "learning_rate": 4.9622192313036366e-05, "loss": 2.2961, "step": 2430 }, { "epoch": 0.32, "grad_norm": 0.298828125, "learning_rate": 4.9621825636065034e-05, "loss": 2.2963, "step": 2431 }, { "epoch": 0.32, "grad_norm": 0.296875, "learning_rate": 4.962145878259897e-05, "loss": 2.2884, "step": 2432 }, { "epoch": 0.32, "grad_norm": 0.28515625, "learning_rate": 4.962109175264083e-05, "loss": 2.2834, "step": 2433 }, { "epoch": 0.32, "grad_norm": 0.298828125, "learning_rate": 4.9620724546193234e-05, "loss": 2.3201, "step": 2434 }, { "epoch": 0.32, "grad_norm": 0.302734375, "learning_rate": 4.96203571632588e-05, "loss": 2.2739, "step": 2435 }, { "epoch": 0.32, "grad_norm": 0.2890625, "learning_rate": 4.961998960384018e-05, "loss": 2.2967, "step": 2436 }, { "epoch": 0.33, "grad_norm": 0.28515625, "learning_rate": 4.961962186793999e-05, "loss": 2.3121, "step": 2437 }, { "epoch": 0.33, "grad_norm": 0.2890625, "learning_rate": 4.961925395556089e-05, "loss": 2.2799, "step": 2438 }, { "epoch": 0.33, "grad_norm": 0.296875, "learning_rate": 4.9618885866705503e-05, "loss": 2.25, "step": 2439 }, { "epoch": 0.33, "grad_norm": 0.287109375, "learning_rate": 4.961851760137647e-05, "loss": 2.2564, "step": 2440 }, { "epoch": 0.33, "grad_norm": 0.306640625, "learning_rate": 4.9618149159576426e-05, "loss": 2.2955, "step": 2441 }, { "epoch": 0.33, "grad_norm": 0.287109375, "learning_rate": 4.961778054130802e-05, "loss": 2.2807, "step": 2442 }, { "epoch": 0.33, "grad_norm": 0.306640625, "learning_rate": 4.961741174657389e-05, "loss": 2.2633, "step": 2443 }, { "epoch": 0.33, "grad_norm": 0.314453125, "learning_rate": 4.9617042775376675e-05, "loss": 2.2421, "step": 2444 }, { "epoch": 0.33, "grad_norm": 0.294921875, "learning_rate": 4.9616673627719036e-05, "loss": 2.2808, "step": 2445 }, { "epoch": 0.33, "grad_norm": 0.296875, "learning_rate": 4.96163043036036e-05, "loss": 2.2739, "step": 2446 }, { "epoch": 0.33, "grad_norm": 0.283203125, "learning_rate": 4.961593480303303e-05, "loss": 2.3052, "step": 2447 }, { "epoch": 0.33, "grad_norm": 0.30078125, "learning_rate": 4.961556512600996e-05, "loss": 2.2809, "step": 2448 }, { "epoch": 0.33, "grad_norm": 0.291015625, "learning_rate": 4.961519527253705e-05, "loss": 2.273, "step": 2449 }, { "epoch": 0.33, "grad_norm": 0.302734375, "learning_rate": 4.961482524261695e-05, "loss": 2.2762, "step": 2450 }, { "epoch": 0.33, "grad_norm": 0.30078125, "learning_rate": 4.9614455036252306e-05, "loss": 2.2893, "step": 2451 }, { "epoch": 0.33, "grad_norm": 0.279296875, "learning_rate": 4.961408465344578e-05, "loss": 2.2899, "step": 2452 }, { "epoch": 0.33, "grad_norm": 0.287109375, "learning_rate": 4.9613714094200025e-05, "loss": 2.2937, "step": 2453 }, { "epoch": 0.33, "grad_norm": 0.3046875, "learning_rate": 4.961334335851769e-05, "loss": 2.2933, "step": 2454 }, { "epoch": 0.33, "grad_norm": 0.30078125, "learning_rate": 4.961297244640145e-05, "loss": 2.3286, "step": 2455 }, { "epoch": 0.33, "grad_norm": 0.30078125, "learning_rate": 4.961260135785395e-05, "loss": 2.304, "step": 2456 }, { "epoch": 0.33, "grad_norm": 0.30859375, "learning_rate": 4.961223009287784e-05, "loss": 2.3075, "step": 2457 }, { "epoch": 0.33, "grad_norm": 0.3125, "learning_rate": 4.9611858651475805e-05, "loss": 2.2745, "step": 2458 }, { "epoch": 0.33, "grad_norm": 0.310546875, "learning_rate": 4.961148703365049e-05, "loss": 2.2907, "step": 2459 }, { "epoch": 0.33, "grad_norm": 0.294921875, "learning_rate": 4.961111523940457e-05, "loss": 2.2612, "step": 2460 }, { "epoch": 0.33, "grad_norm": 0.30859375, "learning_rate": 4.9610743268740705e-05, "loss": 2.2663, "step": 2461 }, { "epoch": 0.33, "grad_norm": 0.28515625, "learning_rate": 4.961037112166156e-05, "loss": 2.3134, "step": 2462 }, { "epoch": 0.33, "grad_norm": 0.30078125, "learning_rate": 4.96099987981698e-05, "loss": 2.2761, "step": 2463 }, { "epoch": 0.33, "grad_norm": 0.306640625, "learning_rate": 4.960962629826811e-05, "loss": 2.2744, "step": 2464 }, { "epoch": 0.33, "grad_norm": 0.291015625, "learning_rate": 4.960925362195913e-05, "loss": 2.3178, "step": 2465 }, { "epoch": 0.33, "grad_norm": 0.330078125, "learning_rate": 4.9608880769245565e-05, "loss": 2.2856, "step": 2466 }, { "epoch": 0.33, "grad_norm": 0.3046875, "learning_rate": 4.960850774013006e-05, "loss": 2.3012, "step": 2467 }, { "epoch": 0.33, "grad_norm": 0.298828125, "learning_rate": 4.960813453461531e-05, "loss": 2.2899, "step": 2468 }, { "epoch": 0.33, "grad_norm": 0.318359375, "learning_rate": 4.960776115270398e-05, "loss": 2.2777, "step": 2469 }, { "epoch": 0.33, "grad_norm": 0.3125, "learning_rate": 4.9607387594398743e-05, "loss": 2.3069, "step": 2470 }, { "epoch": 0.33, "grad_norm": 0.29296875, "learning_rate": 4.960701385970229e-05, "loss": 2.2526, "step": 2471 }, { "epoch": 0.33, "grad_norm": 0.3125, "learning_rate": 4.960663994861729e-05, "loss": 2.279, "step": 2472 }, { "epoch": 0.33, "grad_norm": 0.333984375, "learning_rate": 4.9606265861146424e-05, "loss": 2.29, "step": 2473 }, { "epoch": 0.33, "grad_norm": 0.298828125, "learning_rate": 4.960589159729238e-05, "loss": 2.277, "step": 2474 }, { "epoch": 0.33, "grad_norm": 0.3046875, "learning_rate": 4.960551715705782e-05, "loss": 2.3068, "step": 2475 }, { "epoch": 0.33, "grad_norm": 0.296875, "learning_rate": 4.9605142540445454e-05, "loss": 2.2908, "step": 2476 }, { "epoch": 0.33, "grad_norm": 0.3125, "learning_rate": 4.9604767747457956e-05, "loss": 2.2751, "step": 2477 }, { "epoch": 0.33, "grad_norm": 0.30078125, "learning_rate": 4.9604392778098005e-05, "loss": 2.2595, "step": 2478 }, { "epoch": 0.33, "grad_norm": 0.30078125, "learning_rate": 4.960401763236831e-05, "loss": 2.2716, "step": 2479 }, { "epoch": 0.33, "grad_norm": 0.302734375, "learning_rate": 4.9603642310271545e-05, "loss": 2.2813, "step": 2480 }, { "epoch": 0.33, "grad_norm": 0.322265625, "learning_rate": 4.96032668118104e-05, "loss": 2.3054, "step": 2481 }, { "epoch": 0.33, "grad_norm": 0.31640625, "learning_rate": 4.960289113698756e-05, "loss": 2.2461, "step": 2482 }, { "epoch": 0.33, "grad_norm": 0.310546875, "learning_rate": 4.960251528580574e-05, "loss": 2.2895, "step": 2483 }, { "epoch": 0.33, "grad_norm": 0.275390625, "learning_rate": 4.960213925826762e-05, "loss": 2.2615, "step": 2484 }, { "epoch": 0.33, "grad_norm": 0.291015625, "learning_rate": 4.960176305437589e-05, "loss": 2.292, "step": 2485 }, { "epoch": 0.33, "grad_norm": 0.3046875, "learning_rate": 4.960138667413326e-05, "loss": 2.2833, "step": 2486 }, { "epoch": 0.33, "grad_norm": 0.296875, "learning_rate": 4.9601010117542426e-05, "loss": 2.305, "step": 2487 }, { "epoch": 0.33, "grad_norm": 0.30859375, "learning_rate": 4.960063338460607e-05, "loss": 2.2602, "step": 2488 }, { "epoch": 0.33, "grad_norm": 0.287109375, "learning_rate": 4.960025647532691e-05, "loss": 2.2884, "step": 2489 }, { "epoch": 0.33, "grad_norm": 0.326171875, "learning_rate": 4.959987938970765e-05, "loss": 2.3133, "step": 2490 }, { "epoch": 0.33, "grad_norm": 0.294921875, "learning_rate": 4.959950212775098e-05, "loss": 2.313, "step": 2491 }, { "epoch": 0.33, "grad_norm": 0.3046875, "learning_rate": 4.959912468945961e-05, "loss": 2.2842, "step": 2492 }, { "epoch": 0.33, "grad_norm": 0.279296875, "learning_rate": 4.959874707483625e-05, "loss": 2.2785, "step": 2493 }, { "epoch": 0.33, "grad_norm": 0.296875, "learning_rate": 4.95983692838836e-05, "loss": 2.2799, "step": 2494 }, { "epoch": 0.33, "grad_norm": 0.291015625, "learning_rate": 4.959799131660437e-05, "loss": 2.2938, "step": 2495 }, { "epoch": 0.33, "grad_norm": 0.306640625, "learning_rate": 4.959761317300128e-05, "loss": 2.2975, "step": 2496 }, { "epoch": 0.33, "grad_norm": 0.28125, "learning_rate": 4.9597234853077015e-05, "loss": 2.2533, "step": 2497 }, { "epoch": 0.33, "grad_norm": 0.302734375, "learning_rate": 4.959685635683431e-05, "loss": 2.3033, "step": 2498 }, { "epoch": 0.33, "grad_norm": 0.30078125, "learning_rate": 4.959647768427588e-05, "loss": 2.2919, "step": 2499 }, { "epoch": 0.33, "grad_norm": 0.28125, "learning_rate": 4.959609883540442e-05, "loss": 2.284, "step": 2500 }, { "epoch": 0.33, "grad_norm": 0.28515625, "learning_rate": 4.959571981022266e-05, "loss": 2.2775, "step": 2501 }, { "epoch": 0.33, "grad_norm": 0.296875, "learning_rate": 4.959534060873331e-05, "loss": 2.304, "step": 2502 }, { "epoch": 0.33, "grad_norm": 0.287109375, "learning_rate": 4.959496123093909e-05, "loss": 2.3085, "step": 2503 }, { "epoch": 0.33, "grad_norm": 0.30078125, "learning_rate": 4.959458167684272e-05, "loss": 2.2811, "step": 2504 }, { "epoch": 0.33, "grad_norm": 0.28515625, "learning_rate": 4.959420194644693e-05, "loss": 2.2817, "step": 2505 }, { "epoch": 0.33, "grad_norm": 0.302734375, "learning_rate": 4.959382203975442e-05, "loss": 2.2987, "step": 2506 }, { "epoch": 0.33, "grad_norm": 0.27734375, "learning_rate": 4.959344195676794e-05, "loss": 2.2521, "step": 2507 }, { "epoch": 0.33, "grad_norm": 0.30859375, "learning_rate": 4.959306169749019e-05, "loss": 2.2909, "step": 2508 }, { "epoch": 0.33, "grad_norm": 0.287109375, "learning_rate": 4.959268126192392e-05, "loss": 2.279, "step": 2509 }, { "epoch": 0.33, "grad_norm": 0.291015625, "learning_rate": 4.959230065007183e-05, "loss": 2.2679, "step": 2510 }, { "epoch": 0.33, "grad_norm": 0.294921875, "learning_rate": 4.9591919861936676e-05, "loss": 2.3045, "step": 2511 }, { "epoch": 0.34, "grad_norm": 0.3046875, "learning_rate": 4.9591538897521164e-05, "loss": 2.2982, "step": 2512 }, { "epoch": 0.34, "grad_norm": 0.29296875, "learning_rate": 4.9591157756828034e-05, "loss": 2.2803, "step": 2513 }, { "epoch": 0.34, "grad_norm": 0.291015625, "learning_rate": 4.959077643986002e-05, "loss": 2.2928, "step": 2514 }, { "epoch": 0.34, "grad_norm": 0.28515625, "learning_rate": 4.959039494661985e-05, "loss": 2.3069, "step": 2515 }, { "epoch": 0.34, "grad_norm": 0.283203125, "learning_rate": 4.959001327711028e-05, "loss": 2.2884, "step": 2516 }, { "epoch": 0.34, "grad_norm": 0.3046875, "learning_rate": 4.9589631431334014e-05, "loss": 2.313, "step": 2517 }, { "epoch": 0.34, "grad_norm": 0.326171875, "learning_rate": 4.958924940929381e-05, "loss": 2.267, "step": 2518 }, { "epoch": 0.34, "grad_norm": 0.287109375, "learning_rate": 4.9588867210992396e-05, "loss": 2.2718, "step": 2519 }, { "epoch": 0.34, "grad_norm": 0.302734375, "learning_rate": 4.958848483643252e-05, "loss": 2.2815, "step": 2520 }, { "epoch": 0.34, "grad_norm": 0.29296875, "learning_rate": 4.9588102285616914e-05, "loss": 2.2798, "step": 2521 }, { "epoch": 0.34, "grad_norm": 0.29296875, "learning_rate": 4.958771955854833e-05, "loss": 2.2958, "step": 2522 }, { "epoch": 0.34, "grad_norm": 0.294921875, "learning_rate": 4.9587336655229496e-05, "loss": 2.2748, "step": 2523 }, { "epoch": 0.34, "grad_norm": 0.3046875, "learning_rate": 4.9586953575663184e-05, "loss": 2.3034, "step": 2524 }, { "epoch": 0.34, "grad_norm": 0.3125, "learning_rate": 4.958657031985211e-05, "loss": 2.3028, "step": 2525 }, { "epoch": 0.34, "grad_norm": 1.6796875, "learning_rate": 4.958618688779904e-05, "loss": 2.2888, "step": 2526 }, { "epoch": 0.34, "grad_norm": 0.296875, "learning_rate": 4.9585803279506714e-05, "loss": 2.3002, "step": 2527 }, { "epoch": 0.34, "grad_norm": 0.31640625, "learning_rate": 4.958541949497789e-05, "loss": 2.3051, "step": 2528 }, { "epoch": 0.34, "grad_norm": 0.3203125, "learning_rate": 4.9585035534215314e-05, "loss": 2.309, "step": 2529 }, { "epoch": 0.34, "grad_norm": 0.3125, "learning_rate": 4.958465139722174e-05, "loss": 2.2496, "step": 2530 }, { "epoch": 0.34, "grad_norm": 0.30078125, "learning_rate": 4.9584267083999914e-05, "loss": 2.3226, "step": 2531 }, { "epoch": 0.34, "grad_norm": 0.310546875, "learning_rate": 4.9583882594552594e-05, "loss": 2.2921, "step": 2532 }, { "epoch": 0.34, "grad_norm": 0.314453125, "learning_rate": 4.9583497928882536e-05, "loss": 2.2712, "step": 2533 }, { "epoch": 0.34, "grad_norm": 0.3125, "learning_rate": 4.958311308699252e-05, "loss": 2.2963, "step": 2534 }, { "epoch": 0.34, "grad_norm": 0.30859375, "learning_rate": 4.9582728068885265e-05, "loss": 2.2979, "step": 2535 }, { "epoch": 0.34, "grad_norm": 0.3046875, "learning_rate": 4.958234287456356e-05, "loss": 2.2851, "step": 2536 }, { "epoch": 0.34, "grad_norm": 0.314453125, "learning_rate": 4.958195750403015e-05, "loss": 2.2867, "step": 2537 }, { "epoch": 0.34, "grad_norm": 0.3125, "learning_rate": 4.958157195728781e-05, "loss": 2.2798, "step": 2538 }, { "epoch": 0.34, "grad_norm": 0.3046875, "learning_rate": 4.9581186234339296e-05, "loss": 2.3028, "step": 2539 }, { "epoch": 0.34, "grad_norm": 0.30078125, "learning_rate": 4.9580800335187374e-05, "loss": 2.2804, "step": 2540 }, { "epoch": 0.34, "grad_norm": 0.302734375, "learning_rate": 4.958041425983481e-05, "loss": 2.2899, "step": 2541 }, { "epoch": 0.34, "grad_norm": 0.30859375, "learning_rate": 4.9580028008284375e-05, "loss": 2.2544, "step": 2542 }, { "epoch": 0.34, "grad_norm": 0.296875, "learning_rate": 4.957964158053884e-05, "loss": 2.2997, "step": 2543 }, { "epoch": 0.34, "grad_norm": 0.298828125, "learning_rate": 4.9579254976600964e-05, "loss": 2.3012, "step": 2544 }, { "epoch": 0.34, "grad_norm": 0.3203125, "learning_rate": 4.957886819647353e-05, "loss": 2.2895, "step": 2545 }, { "epoch": 0.34, "grad_norm": 0.291015625, "learning_rate": 4.957848124015929e-05, "loss": 2.2804, "step": 2546 }, { "epoch": 0.34, "grad_norm": 0.28515625, "learning_rate": 4.957809410766105e-05, "loss": 2.2715, "step": 2547 }, { "epoch": 0.34, "grad_norm": 0.310546875, "learning_rate": 4.957770679898156e-05, "loss": 2.2677, "step": 2548 }, { "epoch": 0.34, "grad_norm": 0.306640625, "learning_rate": 4.957731931412361e-05, "loss": 2.2623, "step": 2549 }, { "epoch": 0.34, "grad_norm": 0.3203125, "learning_rate": 4.957693165308996e-05, "loss": 2.2945, "step": 2550 }, { "epoch": 0.34, "grad_norm": 0.27734375, "learning_rate": 4.957654381588341e-05, "loss": 2.2545, "step": 2551 }, { "epoch": 0.34, "grad_norm": 0.30859375, "learning_rate": 4.957615580250673e-05, "loss": 2.2961, "step": 2552 }, { "epoch": 0.34, "grad_norm": 0.30078125, "learning_rate": 4.95757676129627e-05, "loss": 2.2902, "step": 2553 }, { "epoch": 0.34, "grad_norm": 0.287109375, "learning_rate": 4.9575379247254103e-05, "loss": 2.2831, "step": 2554 }, { "epoch": 0.34, "grad_norm": 0.314453125, "learning_rate": 4.9574990705383734e-05, "loss": 2.293, "step": 2555 }, { "epoch": 0.34, "grad_norm": 0.294921875, "learning_rate": 4.9574601987354364e-05, "loss": 2.2801, "step": 2556 }, { "epoch": 0.34, "grad_norm": 0.2890625, "learning_rate": 4.957421309316878e-05, "loss": 2.2983, "step": 2557 }, { "epoch": 0.34, "grad_norm": 0.296875, "learning_rate": 4.9573824022829776e-05, "loss": 2.2981, "step": 2558 }, { "epoch": 0.34, "grad_norm": 0.3046875, "learning_rate": 4.957343477634014e-05, "loss": 2.3039, "step": 2559 }, { "epoch": 0.34, "grad_norm": 0.302734375, "learning_rate": 4.957304535370266e-05, "loss": 2.2876, "step": 2560 }, { "epoch": 0.34, "grad_norm": 0.310546875, "learning_rate": 4.957265575492013e-05, "loss": 2.303, "step": 2561 }, { "epoch": 0.34, "grad_norm": 0.3046875, "learning_rate": 4.957226597999534e-05, "loss": 2.2454, "step": 2562 }, { "epoch": 0.34, "grad_norm": 0.306640625, "learning_rate": 4.957187602893109e-05, "loss": 2.3052, "step": 2563 }, { "epoch": 0.34, "grad_norm": 0.298828125, "learning_rate": 4.9571485901730164e-05, "loss": 2.2931, "step": 2564 }, { "epoch": 0.34, "grad_norm": 0.29296875, "learning_rate": 4.957109559839537e-05, "loss": 2.2524, "step": 2565 }, { "epoch": 0.34, "grad_norm": 0.29296875, "learning_rate": 4.9570705118929494e-05, "loss": 2.304, "step": 2566 }, { "epoch": 0.34, "grad_norm": 0.287109375, "learning_rate": 4.9570314463335344e-05, "loss": 2.2727, "step": 2567 }, { "epoch": 0.34, "grad_norm": 0.310546875, "learning_rate": 4.956992363161572e-05, "loss": 2.2934, "step": 2568 }, { "epoch": 0.34, "grad_norm": 0.306640625, "learning_rate": 4.956953262377342e-05, "loss": 2.2832, "step": 2569 }, { "epoch": 0.34, "grad_norm": 0.30859375, "learning_rate": 4.956914143981125e-05, "loss": 2.2785, "step": 2570 }, { "epoch": 0.34, "grad_norm": 0.322265625, "learning_rate": 4.956875007973202e-05, "loss": 2.2991, "step": 2571 }, { "epoch": 0.34, "grad_norm": 0.29296875, "learning_rate": 4.956835854353852e-05, "loss": 2.2835, "step": 2572 }, { "epoch": 0.34, "grad_norm": 0.337890625, "learning_rate": 4.9567966831233564e-05, "loss": 2.2603, "step": 2573 }, { "epoch": 0.34, "grad_norm": 0.30078125, "learning_rate": 4.956757494281996e-05, "loss": 2.2805, "step": 2574 }, { "epoch": 0.34, "grad_norm": 0.287109375, "learning_rate": 4.9567182878300524e-05, "loss": 2.3145, "step": 2575 }, { "epoch": 0.34, "grad_norm": 0.302734375, "learning_rate": 4.956679063767805e-05, "loss": 2.3061, "step": 2576 }, { "epoch": 0.34, "grad_norm": 0.294921875, "learning_rate": 4.956639822095537e-05, "loss": 2.2927, "step": 2577 }, { "epoch": 0.34, "grad_norm": 0.283203125, "learning_rate": 4.956600562813528e-05, "loss": 2.2784, "step": 2578 }, { "epoch": 0.34, "grad_norm": 0.310546875, "learning_rate": 4.95656128592206e-05, "loss": 2.2909, "step": 2579 }, { "epoch": 0.34, "grad_norm": 0.287109375, "learning_rate": 4.9565219914214156e-05, "loss": 2.2813, "step": 2580 }, { "epoch": 0.34, "grad_norm": 0.31640625, "learning_rate": 4.9564826793118746e-05, "loss": 2.3182, "step": 2581 }, { "epoch": 0.34, "grad_norm": 0.2890625, "learning_rate": 4.95644334959372e-05, "loss": 2.2693, "step": 2582 }, { "epoch": 0.34, "grad_norm": 0.291015625, "learning_rate": 4.956404002267233e-05, "loss": 2.2969, "step": 2583 }, { "epoch": 0.34, "grad_norm": 0.294921875, "learning_rate": 4.9563646373326966e-05, "loss": 2.291, "step": 2584 }, { "epoch": 0.34, "grad_norm": 1.65625, "learning_rate": 4.956325254790392e-05, "loss": 2.2856, "step": 2585 }, { "epoch": 0.34, "grad_norm": 0.2890625, "learning_rate": 4.9562858546406024e-05, "loss": 2.2837, "step": 2586 }, { "epoch": 0.35, "grad_norm": 0.322265625, "learning_rate": 4.956246436883609e-05, "loss": 2.2635, "step": 2587 }, { "epoch": 0.35, "grad_norm": 0.31640625, "learning_rate": 4.956207001519696e-05, "loss": 2.2782, "step": 2588 }, { "epoch": 0.35, "grad_norm": 0.302734375, "learning_rate": 4.956167548549146e-05, "loss": 2.2758, "step": 2589 }, { "epoch": 0.35, "grad_norm": 0.296875, "learning_rate": 4.95612807797224e-05, "loss": 2.3113, "step": 2590 }, { "epoch": 0.35, "grad_norm": 0.3359375, "learning_rate": 4.9560885897892614e-05, "loss": 2.2612, "step": 2591 }, { "epoch": 0.35, "grad_norm": 0.30078125, "learning_rate": 4.956049084000494e-05, "loss": 2.2915, "step": 2592 }, { "epoch": 0.35, "grad_norm": 0.3046875, "learning_rate": 4.956009560606221e-05, "loss": 2.3082, "step": 2593 }, { "epoch": 0.35, "grad_norm": 0.3125, "learning_rate": 4.955970019606726e-05, "loss": 2.2815, "step": 2594 }, { "epoch": 0.35, "grad_norm": 0.31640625, "learning_rate": 4.955930461002292e-05, "loss": 2.2947, "step": 2595 }, { "epoch": 0.35, "grad_norm": 0.310546875, "learning_rate": 4.9558908847932014e-05, "loss": 2.2886, "step": 2596 }, { "epoch": 0.35, "grad_norm": 0.318359375, "learning_rate": 4.955851290979741e-05, "loss": 2.2941, "step": 2597 }, { "epoch": 0.35, "grad_norm": 0.306640625, "learning_rate": 4.955811679562191e-05, "loss": 2.2726, "step": 2598 }, { "epoch": 0.35, "grad_norm": 0.296875, "learning_rate": 4.955772050540837e-05, "loss": 2.3304, "step": 2599 }, { "epoch": 0.35, "grad_norm": 0.30078125, "learning_rate": 4.9557324039159634e-05, "loss": 2.3095, "step": 2600 }, { "epoch": 0.35, "grad_norm": 0.310546875, "learning_rate": 4.955692739687854e-05, "loss": 2.2602, "step": 2601 }, { "epoch": 0.35, "grad_norm": 0.3203125, "learning_rate": 4.9556530578567936e-05, "loss": 2.2817, "step": 2602 }, { "epoch": 0.35, "grad_norm": 0.296875, "learning_rate": 4.955613358423066e-05, "loss": 2.3148, "step": 2603 }, { "epoch": 0.35, "grad_norm": 0.306640625, "learning_rate": 4.955573641386955e-05, "loss": 2.2664, "step": 2604 }, { "epoch": 0.35, "grad_norm": 0.30859375, "learning_rate": 4.9555339067487475e-05, "loss": 2.2675, "step": 2605 }, { "epoch": 0.35, "grad_norm": 0.322265625, "learning_rate": 4.9554941545087265e-05, "loss": 2.2954, "step": 2606 }, { "epoch": 0.35, "grad_norm": 0.30078125, "learning_rate": 4.955454384667178e-05, "loss": 2.3038, "step": 2607 }, { "epoch": 0.35, "grad_norm": 0.30078125, "learning_rate": 4.955414597224387e-05, "loss": 2.272, "step": 2608 }, { "epoch": 0.35, "grad_norm": 0.31640625, "learning_rate": 4.955374792180637e-05, "loss": 2.3054, "step": 2609 }, { "epoch": 0.35, "grad_norm": 0.287109375, "learning_rate": 4.9553349695362156e-05, "loss": 2.2829, "step": 2610 }, { "epoch": 0.35, "grad_norm": 0.30859375, "learning_rate": 4.9552951292914074e-05, "loss": 2.2822, "step": 2611 }, { "epoch": 0.35, "grad_norm": 0.294921875, "learning_rate": 4.955255271446497e-05, "loss": 2.3016, "step": 2612 }, { "epoch": 0.35, "grad_norm": 0.296875, "learning_rate": 4.9552153960017725e-05, "loss": 2.2834, "step": 2613 }, { "epoch": 0.35, "grad_norm": 0.306640625, "learning_rate": 4.955175502957518e-05, "loss": 2.2836, "step": 2614 }, { "epoch": 0.35, "grad_norm": 0.29296875, "learning_rate": 4.9551355923140184e-05, "loss": 2.282, "step": 2615 }, { "epoch": 0.35, "grad_norm": 0.296875, "learning_rate": 4.9550956640715616e-05, "loss": 2.2945, "step": 2616 }, { "epoch": 0.35, "grad_norm": 0.291015625, "learning_rate": 4.955055718230434e-05, "loss": 2.3108, "step": 2617 }, { "epoch": 0.35, "grad_norm": 0.291015625, "learning_rate": 4.955015754790921e-05, "loss": 2.3157, "step": 2618 }, { "epoch": 0.35, "grad_norm": 0.306640625, "learning_rate": 4.9549757737533095e-05, "loss": 2.2884, "step": 2619 }, { "epoch": 0.35, "grad_norm": 0.306640625, "learning_rate": 4.954935775117886e-05, "loss": 2.2898, "step": 2620 }, { "epoch": 0.35, "grad_norm": 0.30078125, "learning_rate": 4.954895758884937e-05, "loss": 2.2764, "step": 2621 }, { "epoch": 0.35, "grad_norm": 0.27734375, "learning_rate": 4.954855725054749e-05, "loss": 2.3007, "step": 2622 }, { "epoch": 0.35, "grad_norm": 0.287109375, "learning_rate": 4.95481567362761e-05, "loss": 2.2891, "step": 2623 }, { "epoch": 0.35, "grad_norm": 0.318359375, "learning_rate": 4.954775604603806e-05, "loss": 2.2999, "step": 2624 }, { "epoch": 0.35, "grad_norm": 0.302734375, "learning_rate": 4.954735517983625e-05, "loss": 2.2797, "step": 2625 }, { "epoch": 0.35, "grad_norm": 0.298828125, "learning_rate": 4.954695413767355e-05, "loss": 2.2852, "step": 2626 }, { "epoch": 0.35, "grad_norm": 0.291015625, "learning_rate": 4.954655291955282e-05, "loss": 2.3382, "step": 2627 }, { "epoch": 0.35, "grad_norm": 0.29296875, "learning_rate": 4.9546151525476936e-05, "loss": 2.3063, "step": 2628 }, { "epoch": 0.35, "grad_norm": 0.296875, "learning_rate": 4.95457499554488e-05, "loss": 2.3138, "step": 2629 }, { "epoch": 0.35, "grad_norm": 0.296875, "learning_rate": 4.9545348209471254e-05, "loss": 2.3005, "step": 2630 }, { "epoch": 0.35, "grad_norm": 0.3125, "learning_rate": 4.95449462875472e-05, "loss": 2.2619, "step": 2631 }, { "epoch": 0.35, "grad_norm": 0.298828125, "learning_rate": 4.954454418967951e-05, "loss": 2.2762, "step": 2632 }, { "epoch": 0.35, "grad_norm": 0.27734375, "learning_rate": 4.9544141915871075e-05, "loss": 2.2762, "step": 2633 }, { "epoch": 0.35, "grad_norm": 0.30078125, "learning_rate": 4.9543739466124784e-05, "loss": 2.2639, "step": 2634 }, { "epoch": 0.35, "grad_norm": 0.291015625, "learning_rate": 4.954333684044351e-05, "loss": 2.2682, "step": 2635 }, { "epoch": 0.35, "grad_norm": 0.283203125, "learning_rate": 4.954293403883013e-05, "loss": 2.262, "step": 2636 }, { "epoch": 0.35, "grad_norm": 0.29296875, "learning_rate": 4.954253106128755e-05, "loss": 2.3146, "step": 2637 }, { "epoch": 0.35, "grad_norm": 0.298828125, "learning_rate": 4.9542127907818656e-05, "loss": 2.2779, "step": 2638 }, { "epoch": 0.35, "grad_norm": 0.30859375, "learning_rate": 4.954172457842633e-05, "loss": 2.2824, "step": 2639 }, { "epoch": 0.35, "grad_norm": 0.3046875, "learning_rate": 4.954132107311347e-05, "loss": 2.283, "step": 2640 }, { "epoch": 0.35, "grad_norm": 0.30859375, "learning_rate": 4.9540917391882956e-05, "loss": 2.2774, "step": 2641 }, { "epoch": 0.35, "grad_norm": 0.3046875, "learning_rate": 4.95405135347377e-05, "loss": 2.2864, "step": 2642 }, { "epoch": 0.35, "grad_norm": 0.296875, "learning_rate": 4.954010950168058e-05, "loss": 2.3245, "step": 2643 }, { "epoch": 0.35, "grad_norm": 0.28515625, "learning_rate": 4.9539705292714505e-05, "loss": 2.3133, "step": 2644 }, { "epoch": 0.35, "grad_norm": 0.296875, "learning_rate": 4.9539300907842366e-05, "loss": 2.2904, "step": 2645 }, { "epoch": 0.35, "grad_norm": 0.30078125, "learning_rate": 4.9538896347067064e-05, "loss": 2.2855, "step": 2646 }, { "epoch": 0.35, "grad_norm": 0.310546875, "learning_rate": 4.95384916103915e-05, "loss": 2.2681, "step": 2647 }, { "epoch": 0.35, "grad_norm": 0.298828125, "learning_rate": 4.9538086697818574e-05, "loss": 2.2954, "step": 2648 }, { "epoch": 0.35, "grad_norm": 0.28515625, "learning_rate": 4.9537681609351186e-05, "loss": 2.2286, "step": 2649 }, { "epoch": 0.35, "grad_norm": 0.3046875, "learning_rate": 4.953727634499224e-05, "loss": 2.2896, "step": 2650 }, { "epoch": 0.35, "grad_norm": 0.291015625, "learning_rate": 4.953687090474465e-05, "loss": 2.278, "step": 2651 }, { "epoch": 0.35, "grad_norm": 0.306640625, "learning_rate": 4.9536465288611305e-05, "loss": 2.314, "step": 2652 }, { "epoch": 0.35, "grad_norm": 0.31640625, "learning_rate": 4.9536059496595125e-05, "loss": 2.2972, "step": 2653 }, { "epoch": 0.35, "grad_norm": 0.3046875, "learning_rate": 4.9535653528699025e-05, "loss": 2.2894, "step": 2654 }, { "epoch": 0.35, "grad_norm": 0.3125, "learning_rate": 4.95352473849259e-05, "loss": 2.2707, "step": 2655 }, { "epoch": 0.35, "grad_norm": 0.322265625, "learning_rate": 4.953484106527866e-05, "loss": 2.2862, "step": 2656 }, { "epoch": 0.35, "grad_norm": 0.326171875, "learning_rate": 4.953443456976023e-05, "loss": 2.2486, "step": 2657 }, { "epoch": 0.35, "grad_norm": 0.306640625, "learning_rate": 4.953402789837352e-05, "loss": 2.2617, "step": 2658 }, { "epoch": 0.35, "grad_norm": 0.28125, "learning_rate": 4.9533621051121454e-05, "loss": 2.2564, "step": 2659 }, { "epoch": 0.35, "grad_norm": 0.287109375, "learning_rate": 4.953321402800693e-05, "loss": 2.247, "step": 2660 }, { "epoch": 0.35, "grad_norm": 0.33203125, "learning_rate": 4.953280682903287e-05, "loss": 2.2738, "step": 2661 }, { "epoch": 0.36, "grad_norm": 0.318359375, "learning_rate": 4.9532399454202205e-05, "loss": 2.2986, "step": 2662 }, { "epoch": 0.36, "grad_norm": 0.3203125, "learning_rate": 4.953199190351785e-05, "loss": 2.2721, "step": 2663 }, { "epoch": 0.36, "grad_norm": 0.314453125, "learning_rate": 4.953158417698272e-05, "loss": 2.2835, "step": 2664 }, { "epoch": 0.36, "grad_norm": 0.291015625, "learning_rate": 4.953117627459973e-05, "loss": 2.3019, "step": 2665 }, { "epoch": 0.36, "grad_norm": 0.306640625, "learning_rate": 4.953076819637183e-05, "loss": 2.2602, "step": 2666 }, { "epoch": 0.36, "grad_norm": 0.294921875, "learning_rate": 4.953035994230193e-05, "loss": 2.306, "step": 2667 }, { "epoch": 0.36, "grad_norm": 0.287109375, "learning_rate": 4.9529951512392945e-05, "loss": 2.2816, "step": 2668 }, { "epoch": 0.36, "grad_norm": 0.3046875, "learning_rate": 4.952954290664783e-05, "loss": 2.2571, "step": 2669 }, { "epoch": 0.36, "grad_norm": 0.326171875, "learning_rate": 4.952913412506949e-05, "loss": 2.2691, "step": 2670 }, { "epoch": 0.36, "grad_norm": 0.328125, "learning_rate": 4.952872516766087e-05, "loss": 2.3079, "step": 2671 }, { "epoch": 0.36, "grad_norm": 0.30859375, "learning_rate": 4.9528316034424885e-05, "loss": 2.2362, "step": 2672 }, { "epoch": 0.36, "grad_norm": 0.302734375, "learning_rate": 4.9527906725364485e-05, "loss": 2.3112, "step": 2673 }, { "epoch": 0.36, "grad_norm": 0.29296875, "learning_rate": 4.95274972404826e-05, "loss": 2.2993, "step": 2674 }, { "epoch": 0.36, "grad_norm": 0.29296875, "learning_rate": 4.952708757978215e-05, "loss": 2.29, "step": 2675 }, { "epoch": 0.36, "grad_norm": 0.296875, "learning_rate": 4.9526677743266104e-05, "loss": 2.3001, "step": 2676 }, { "epoch": 0.36, "grad_norm": 0.341796875, "learning_rate": 4.952626773093736e-05, "loss": 2.2832, "step": 2677 }, { "epoch": 0.36, "grad_norm": 0.30859375, "learning_rate": 4.9525857542798894e-05, "loss": 2.3275, "step": 2678 }, { "epoch": 0.36, "grad_norm": 0.283203125, "learning_rate": 4.9525447178853616e-05, "loss": 2.2882, "step": 2679 }, { "epoch": 0.36, "grad_norm": 0.296875, "learning_rate": 4.952503663910449e-05, "loss": 2.2969, "step": 2680 }, { "epoch": 0.36, "grad_norm": 0.306640625, "learning_rate": 4.9524625923554444e-05, "loss": 2.2798, "step": 2681 }, { "epoch": 0.36, "grad_norm": 0.3046875, "learning_rate": 4.9524215032206426e-05, "loss": 2.2768, "step": 2682 }, { "epoch": 0.36, "grad_norm": 0.294921875, "learning_rate": 4.952380396506339e-05, "loss": 2.2585, "step": 2683 }, { "epoch": 0.36, "grad_norm": 0.30078125, "learning_rate": 4.952339272212827e-05, "loss": 2.2594, "step": 2684 }, { "epoch": 0.36, "grad_norm": 0.29296875, "learning_rate": 4.9522981303404025e-05, "loss": 2.2834, "step": 2685 }, { "epoch": 0.36, "grad_norm": 0.298828125, "learning_rate": 4.95225697088936e-05, "loss": 2.2846, "step": 2686 }, { "epoch": 0.36, "grad_norm": 0.287109375, "learning_rate": 4.9522157938599934e-05, "loss": 2.3139, "step": 2687 }, { "epoch": 0.36, "grad_norm": 0.306640625, "learning_rate": 4.9521745992525994e-05, "loss": 2.3028, "step": 2688 }, { "epoch": 0.36, "grad_norm": 0.30078125, "learning_rate": 4.952133387067472e-05, "loss": 2.3044, "step": 2689 }, { "epoch": 0.36, "grad_norm": 0.28125, "learning_rate": 4.952092157304909e-05, "loss": 2.2638, "step": 2690 }, { "epoch": 0.36, "grad_norm": 0.279296875, "learning_rate": 4.9520509099652036e-05, "loss": 2.286, "step": 2691 }, { "epoch": 0.36, "grad_norm": 0.29296875, "learning_rate": 4.952009645048652e-05, "loss": 2.2902, "step": 2692 }, { "epoch": 0.36, "grad_norm": 0.3203125, "learning_rate": 4.95196836255555e-05, "loss": 2.2658, "step": 2693 }, { "epoch": 0.36, "grad_norm": 0.306640625, "learning_rate": 4.951927062486195e-05, "loss": 2.2964, "step": 2694 }, { "epoch": 0.36, "grad_norm": 0.302734375, "learning_rate": 4.951885744840881e-05, "loss": 2.2954, "step": 2695 }, { "epoch": 0.36, "grad_norm": 0.287109375, "learning_rate": 4.951844409619905e-05, "loss": 2.3085, "step": 2696 }, { "epoch": 0.36, "grad_norm": 0.296875, "learning_rate": 4.951803056823562e-05, "loss": 2.2679, "step": 2697 }, { "epoch": 0.36, "grad_norm": 0.283203125, "learning_rate": 4.951761686452151e-05, "loss": 2.3018, "step": 2698 }, { "epoch": 0.36, "grad_norm": 0.298828125, "learning_rate": 4.9517202985059676e-05, "loss": 2.2555, "step": 2699 }, { "epoch": 0.36, "grad_norm": 0.287109375, "learning_rate": 4.951678892985307e-05, "loss": 2.3221, "step": 2700 }, { "epoch": 0.36, "grad_norm": 0.3046875, "learning_rate": 4.951637469890468e-05, "loss": 2.3153, "step": 2701 }, { "epoch": 0.36, "grad_norm": 0.294921875, "learning_rate": 4.951596029221746e-05, "loss": 2.2522, "step": 2702 }, { "epoch": 0.36, "grad_norm": 0.28515625, "learning_rate": 4.9515545709794386e-05, "loss": 2.274, "step": 2703 }, { "epoch": 0.36, "grad_norm": 0.28515625, "learning_rate": 4.951513095163843e-05, "loss": 2.2829, "step": 2704 }, { "epoch": 0.36, "grad_norm": 0.30078125, "learning_rate": 4.951471601775257e-05, "loss": 2.2864, "step": 2705 }, { "epoch": 0.36, "grad_norm": 0.2890625, "learning_rate": 4.9514300908139776e-05, "loss": 2.3143, "step": 2706 }, { "epoch": 0.36, "grad_norm": 0.28125, "learning_rate": 4.9513885622803024e-05, "loss": 2.2866, "step": 2707 }, { "epoch": 0.36, "grad_norm": 0.26953125, "learning_rate": 4.951347016174529e-05, "loss": 2.286, "step": 2708 }, { "epoch": 0.36, "grad_norm": 0.29296875, "learning_rate": 4.951305452496955e-05, "loss": 2.2928, "step": 2709 }, { "epoch": 0.36, "grad_norm": 0.291015625, "learning_rate": 4.95126387124788e-05, "loss": 2.2751, "step": 2710 }, { "epoch": 0.36, "grad_norm": 0.298828125, "learning_rate": 4.951222272427599e-05, "loss": 2.2825, "step": 2711 }, { "epoch": 0.36, "grad_norm": 0.296875, "learning_rate": 4.9511806560364125e-05, "loss": 2.2842, "step": 2712 }, { "epoch": 0.36, "grad_norm": 0.3125, "learning_rate": 4.951139022074618e-05, "loss": 2.2859, "step": 2713 }, { "epoch": 0.36, "grad_norm": 0.310546875, "learning_rate": 4.9510973705425146e-05, "loss": 2.316, "step": 2714 }, { "epoch": 0.36, "grad_norm": 0.3046875, "learning_rate": 4.9510557014403994e-05, "loss": 2.2831, "step": 2715 }, { "epoch": 0.36, "grad_norm": 0.296875, "learning_rate": 4.951014014768573e-05, "loss": 2.2813, "step": 2716 }, { "epoch": 0.36, "grad_norm": 0.310546875, "learning_rate": 4.950972310527333e-05, "loss": 2.2829, "step": 2717 }, { "epoch": 0.36, "grad_norm": 0.287109375, "learning_rate": 4.950930588716979e-05, "loss": 2.29, "step": 2718 }, { "epoch": 0.36, "grad_norm": 0.302734375, "learning_rate": 4.9508888493378094e-05, "loss": 2.2982, "step": 2719 }, { "epoch": 0.36, "grad_norm": 0.3046875, "learning_rate": 4.9508470923901235e-05, "loss": 2.2696, "step": 2720 }, { "epoch": 0.36, "grad_norm": 0.306640625, "learning_rate": 4.9508053178742206e-05, "loss": 2.3135, "step": 2721 }, { "epoch": 0.36, "grad_norm": 0.291015625, "learning_rate": 4.950763525790401e-05, "loss": 2.2675, "step": 2722 }, { "epoch": 0.36, "grad_norm": 0.345703125, "learning_rate": 4.950721716138964e-05, "loss": 2.2816, "step": 2723 }, { "epoch": 0.36, "grad_norm": 0.29296875, "learning_rate": 4.9506798889202076e-05, "loss": 2.2989, "step": 2724 }, { "epoch": 0.36, "grad_norm": 0.310546875, "learning_rate": 4.9506380441344345e-05, "loss": 2.2833, "step": 2725 }, { "epoch": 0.36, "grad_norm": 0.28515625, "learning_rate": 4.950596181781942e-05, "loss": 2.2913, "step": 2726 }, { "epoch": 0.36, "grad_norm": 0.294921875, "learning_rate": 4.950554301863032e-05, "loss": 2.2828, "step": 2727 }, { "epoch": 0.36, "grad_norm": 0.296875, "learning_rate": 4.950512404378004e-05, "loss": 2.2908, "step": 2728 }, { "epoch": 0.36, "grad_norm": 0.310546875, "learning_rate": 4.950470489327158e-05, "loss": 2.2798, "step": 2729 }, { "epoch": 0.36, "grad_norm": 0.31640625, "learning_rate": 4.950428556710795e-05, "loss": 2.2682, "step": 2730 }, { "epoch": 0.36, "grad_norm": 0.302734375, "learning_rate": 4.950386606529215e-05, "loss": 2.2946, "step": 2731 }, { "epoch": 0.36, "grad_norm": 0.29296875, "learning_rate": 4.9503446387827205e-05, "loss": 2.2956, "step": 2732 }, { "epoch": 0.36, "grad_norm": 0.294921875, "learning_rate": 4.950302653471609e-05, "loss": 2.2869, "step": 2733 }, { "epoch": 0.36, "grad_norm": 0.296875, "learning_rate": 4.950260650596185e-05, "loss": 2.2925, "step": 2734 }, { "epoch": 0.36, "grad_norm": 0.296875, "learning_rate": 4.950218630156747e-05, "loss": 2.2859, "step": 2735 }, { "epoch": 0.36, "grad_norm": 0.294921875, "learning_rate": 4.950176592153598e-05, "loss": 2.2772, "step": 2736 }, { "epoch": 0.37, "grad_norm": 0.333984375, "learning_rate": 4.9501345365870375e-05, "loss": 2.3104, "step": 2737 }, { "epoch": 0.37, "grad_norm": 0.294921875, "learning_rate": 4.9500924634573684e-05, "loss": 2.2708, "step": 2738 }, { "epoch": 0.37, "grad_norm": 0.28515625, "learning_rate": 4.9500503727648925e-05, "loss": 2.2579, "step": 2739 }, { "epoch": 0.37, "grad_norm": 0.29296875, "learning_rate": 4.95000826450991e-05, "loss": 2.2766, "step": 2740 }, { "epoch": 0.37, "grad_norm": 0.30859375, "learning_rate": 4.949966138692724e-05, "loss": 2.2922, "step": 2741 }, { "epoch": 0.37, "grad_norm": 0.29296875, "learning_rate": 4.949923995313637e-05, "loss": 2.2958, "step": 2742 }, { "epoch": 0.37, "grad_norm": 0.291015625, "learning_rate": 4.949881834372949e-05, "loss": 2.282, "step": 2743 }, { "epoch": 0.37, "grad_norm": 0.3125, "learning_rate": 4.9498396558709636e-05, "loss": 2.3051, "step": 2744 }, { "epoch": 0.37, "grad_norm": 0.287109375, "learning_rate": 4.9497974598079835e-05, "loss": 2.2771, "step": 2745 }, { "epoch": 0.37, "grad_norm": 0.275390625, "learning_rate": 4.94975524618431e-05, "loss": 2.2907, "step": 2746 }, { "epoch": 0.37, "grad_norm": 0.302734375, "learning_rate": 4.949713015000247e-05, "loss": 2.3043, "step": 2747 }, { "epoch": 0.37, "grad_norm": 0.296875, "learning_rate": 4.949670766256096e-05, "loss": 2.2798, "step": 2748 }, { "epoch": 0.37, "grad_norm": 0.306640625, "learning_rate": 4.9496284999521604e-05, "loss": 2.283, "step": 2749 }, { "epoch": 0.37, "grad_norm": 0.294921875, "learning_rate": 4.9495862160887435e-05, "loss": 2.2767, "step": 2750 }, { "epoch": 0.37, "grad_norm": 0.302734375, "learning_rate": 4.9495439146661485e-05, "loss": 2.2551, "step": 2751 }, { "epoch": 0.37, "grad_norm": 0.322265625, "learning_rate": 4.949501595684677e-05, "loss": 2.2842, "step": 2752 }, { "epoch": 0.37, "grad_norm": 0.310546875, "learning_rate": 4.949459259144634e-05, "loss": 2.2985, "step": 2753 }, { "epoch": 0.37, "grad_norm": 0.28515625, "learning_rate": 4.9494169050463234e-05, "loss": 2.2916, "step": 2754 }, { "epoch": 0.37, "grad_norm": 0.3046875, "learning_rate": 4.949374533390047e-05, "loss": 2.3066, "step": 2755 }, { "epoch": 0.37, "grad_norm": 0.3046875, "learning_rate": 4.9493321441761096e-05, "loss": 2.2883, "step": 2756 }, { "epoch": 0.37, "grad_norm": 0.314453125, "learning_rate": 4.949289737404815e-05, "loss": 2.2652, "step": 2757 }, { "epoch": 0.37, "grad_norm": 0.2890625, "learning_rate": 4.9492473130764676e-05, "loss": 2.2681, "step": 2758 }, { "epoch": 0.37, "grad_norm": 0.291015625, "learning_rate": 4.949204871191371e-05, "loss": 2.3159, "step": 2759 }, { "epoch": 0.37, "grad_norm": 0.2890625, "learning_rate": 4.949162411749829e-05, "loss": 2.2855, "step": 2760 }, { "epoch": 0.37, "grad_norm": 1.6328125, "learning_rate": 4.949119934752146e-05, "loss": 2.2905, "step": 2761 }, { "epoch": 0.37, "grad_norm": 0.330078125, "learning_rate": 4.949077440198627e-05, "loss": 2.3051, "step": 2762 }, { "epoch": 0.37, "grad_norm": 0.333984375, "learning_rate": 4.949034928089577e-05, "loss": 2.2834, "step": 2763 }, { "epoch": 0.37, "grad_norm": 0.34375, "learning_rate": 4.948992398425301e-05, "loss": 2.2822, "step": 2764 }, { "epoch": 0.37, "grad_norm": 0.326171875, "learning_rate": 4.948949851206102e-05, "loss": 2.2952, "step": 2765 }, { "epoch": 0.37, "grad_norm": 0.326171875, "learning_rate": 4.948907286432286e-05, "loss": 2.3097, "step": 2766 }, { "epoch": 0.37, "grad_norm": 0.341796875, "learning_rate": 4.9488647041041587e-05, "loss": 2.2756, "step": 2767 }, { "epoch": 0.37, "grad_norm": 0.345703125, "learning_rate": 4.9488221042220244e-05, "loss": 2.271, "step": 2768 }, { "epoch": 0.37, "grad_norm": 0.357421875, "learning_rate": 4.9487794867861895e-05, "loss": 2.2825, "step": 2769 }, { "epoch": 0.37, "grad_norm": 0.33984375, "learning_rate": 4.948736851796958e-05, "loss": 2.2717, "step": 2770 }, { "epoch": 0.37, "grad_norm": 0.296875, "learning_rate": 4.948694199254637e-05, "loss": 2.2924, "step": 2771 }, { "epoch": 0.37, "grad_norm": 0.294921875, "learning_rate": 4.9486515291595316e-05, "loss": 2.2679, "step": 2772 }, { "epoch": 0.37, "grad_norm": 0.341796875, "learning_rate": 4.948608841511948e-05, "loss": 2.2679, "step": 2773 }, { "epoch": 0.37, "grad_norm": 0.33984375, "learning_rate": 4.948566136312191e-05, "loss": 2.2629, "step": 2774 }, { "epoch": 0.37, "grad_norm": 0.3046875, "learning_rate": 4.9485234135605684e-05, "loss": 2.2821, "step": 2775 }, { "epoch": 0.37, "grad_norm": 0.302734375, "learning_rate": 4.948480673257386e-05, "loss": 2.3343, "step": 2776 }, { "epoch": 0.37, "grad_norm": 0.302734375, "learning_rate": 4.948437915402949e-05, "loss": 2.3082, "step": 2777 }, { "epoch": 0.37, "grad_norm": 0.291015625, "learning_rate": 4.9483951399975655e-05, "loss": 2.2611, "step": 2778 }, { "epoch": 0.37, "grad_norm": 0.318359375, "learning_rate": 4.948352347041541e-05, "loss": 2.2713, "step": 2779 }, { "epoch": 0.37, "grad_norm": 0.31640625, "learning_rate": 4.9483095365351826e-05, "loss": 2.3111, "step": 2780 }, { "epoch": 0.37, "grad_norm": 0.298828125, "learning_rate": 4.948266708478797e-05, "loss": 2.2858, "step": 2781 }, { "epoch": 0.37, "grad_norm": 0.310546875, "learning_rate": 4.948223862872692e-05, "loss": 2.2921, "step": 2782 }, { "epoch": 0.37, "grad_norm": 0.29296875, "learning_rate": 4.9481809997171735e-05, "loss": 2.2705, "step": 2783 }, { "epoch": 0.37, "grad_norm": 0.3125, "learning_rate": 4.9481381190125495e-05, "loss": 2.3058, "step": 2784 }, { "epoch": 0.37, "grad_norm": 0.322265625, "learning_rate": 4.948095220759128e-05, "loss": 2.2659, "step": 2785 }, { "epoch": 0.37, "grad_norm": 0.32421875, "learning_rate": 4.9480523049572154e-05, "loss": 2.2848, "step": 2786 }, { "epoch": 0.37, "grad_norm": 0.279296875, "learning_rate": 4.94800937160712e-05, "loss": 2.273, "step": 2787 }, { "epoch": 0.37, "grad_norm": 0.298828125, "learning_rate": 4.947966420709148e-05, "loss": 2.2829, "step": 2788 }, { "epoch": 0.37, "grad_norm": 0.287109375, "learning_rate": 4.947923452263609e-05, "loss": 2.3083, "step": 2789 }, { "epoch": 0.37, "grad_norm": 0.298828125, "learning_rate": 4.947880466270811e-05, "loss": 2.2588, "step": 2790 }, { "epoch": 0.37, "grad_norm": 0.298828125, "learning_rate": 4.947837462731062e-05, "loss": 2.2859, "step": 2791 }, { "epoch": 0.37, "grad_norm": 0.294921875, "learning_rate": 4.947794441644669e-05, "loss": 2.289, "step": 2792 }, { "epoch": 0.37, "grad_norm": 0.3046875, "learning_rate": 4.947751403011942e-05, "loss": 2.2707, "step": 2793 }, { "epoch": 0.37, "grad_norm": 0.283203125, "learning_rate": 4.9477083468331884e-05, "loss": 2.2666, "step": 2794 }, { "epoch": 0.37, "grad_norm": 0.30078125, "learning_rate": 4.9476652731087177e-05, "loss": 2.2809, "step": 2795 }, { "epoch": 0.37, "grad_norm": 0.291015625, "learning_rate": 4.9476221818388377e-05, "loss": 2.2783, "step": 2796 }, { "epoch": 0.37, "grad_norm": 0.3125, "learning_rate": 4.947579073023858e-05, "loss": 2.3309, "step": 2797 }, { "epoch": 0.37, "grad_norm": 0.3046875, "learning_rate": 4.947535946664088e-05, "loss": 2.2699, "step": 2798 }, { "epoch": 0.37, "grad_norm": 0.296875, "learning_rate": 4.947492802759835e-05, "loss": 2.289, "step": 2799 }, { "epoch": 0.37, "grad_norm": 0.310546875, "learning_rate": 4.9474496413114114e-05, "loss": 2.2716, "step": 2800 }, { "epoch": 0.37, "grad_norm": 0.294921875, "learning_rate": 4.947406462319123e-05, "loss": 2.2806, "step": 2801 }, { "epoch": 0.37, "grad_norm": 0.29296875, "learning_rate": 4.947363265783282e-05, "loss": 2.3193, "step": 2802 }, { "epoch": 0.37, "grad_norm": 0.294921875, "learning_rate": 4.947320051704197e-05, "loss": 2.266, "step": 2803 }, { "epoch": 0.37, "grad_norm": 0.30859375, "learning_rate": 4.947276820082177e-05, "loss": 2.2793, "step": 2804 }, { "epoch": 0.37, "grad_norm": 0.291015625, "learning_rate": 4.947233570917533e-05, "loss": 2.2907, "step": 2805 }, { "epoch": 0.37, "grad_norm": 0.302734375, "learning_rate": 4.947190304210576e-05, "loss": 2.3173, "step": 2806 }, { "epoch": 0.37, "grad_norm": 0.3046875, "learning_rate": 4.9471470199616135e-05, "loss": 2.2618, "step": 2807 }, { "epoch": 0.37, "grad_norm": 0.287109375, "learning_rate": 4.9471037181709576e-05, "loss": 2.3018, "step": 2808 }, { "epoch": 0.37, "grad_norm": 0.29296875, "learning_rate": 4.947060398838919e-05, "loss": 2.3169, "step": 2809 }, { "epoch": 0.37, "grad_norm": 0.310546875, "learning_rate": 4.947017061965807e-05, "loss": 2.2572, "step": 2810 }, { "epoch": 0.37, "grad_norm": 0.3046875, "learning_rate": 4.946973707551932e-05, "loss": 2.2911, "step": 2811 }, { "epoch": 0.38, "grad_norm": 0.306640625, "learning_rate": 4.9469303355976064e-05, "loss": 2.3017, "step": 2812 }, { "epoch": 0.38, "grad_norm": 0.298828125, "learning_rate": 4.9468869461031405e-05, "loss": 2.2871, "step": 2813 }, { "epoch": 0.38, "grad_norm": 0.28125, "learning_rate": 4.9468435390688436e-05, "loss": 2.2787, "step": 2814 }, { "epoch": 0.38, "grad_norm": 0.298828125, "learning_rate": 4.94680011449503e-05, "loss": 2.2711, "step": 2815 }, { "epoch": 0.38, "grad_norm": 0.291015625, "learning_rate": 4.946756672382008e-05, "loss": 2.285, "step": 2816 }, { "epoch": 0.38, "grad_norm": 0.298828125, "learning_rate": 4.946713212730091e-05, "loss": 2.2872, "step": 2817 }, { "epoch": 0.38, "grad_norm": 0.294921875, "learning_rate": 4.94666973553959e-05, "loss": 2.2842, "step": 2818 }, { "epoch": 0.38, "grad_norm": 0.306640625, "learning_rate": 4.946626240810815e-05, "loss": 2.296, "step": 2819 }, { "epoch": 0.38, "grad_norm": 0.29296875, "learning_rate": 4.946582728544081e-05, "loss": 2.283, "step": 2820 }, { "epoch": 0.38, "grad_norm": 0.33984375, "learning_rate": 4.946539198739698e-05, "loss": 2.3054, "step": 2821 }, { "epoch": 0.38, "grad_norm": 0.3046875, "learning_rate": 4.946495651397977e-05, "loss": 2.2794, "step": 2822 }, { "epoch": 0.38, "grad_norm": 0.310546875, "learning_rate": 4.946452086519232e-05, "loss": 2.2781, "step": 2823 }, { "epoch": 0.38, "grad_norm": 0.294921875, "learning_rate": 4.946408504103774e-05, "loss": 2.3258, "step": 2824 }, { "epoch": 0.38, "grad_norm": 0.29296875, "learning_rate": 4.946364904151917e-05, "loss": 2.2896, "step": 2825 }, { "epoch": 0.38, "grad_norm": 0.296875, "learning_rate": 4.9463212866639724e-05, "loss": 2.3042, "step": 2826 }, { "epoch": 0.38, "grad_norm": 0.30078125, "learning_rate": 4.946277651640253e-05, "loss": 2.2606, "step": 2827 }, { "epoch": 0.38, "grad_norm": 0.302734375, "learning_rate": 4.9462339990810714e-05, "loss": 2.2928, "step": 2828 }, { "epoch": 0.38, "grad_norm": 0.306640625, "learning_rate": 4.9461903289867404e-05, "loss": 2.254, "step": 2829 }, { "epoch": 0.38, "grad_norm": 0.3125, "learning_rate": 4.9461466413575743e-05, "loss": 2.2973, "step": 2830 }, { "epoch": 0.38, "grad_norm": 0.3203125, "learning_rate": 4.9461029361938844e-05, "loss": 2.2705, "step": 2831 }, { "epoch": 0.38, "grad_norm": 0.306640625, "learning_rate": 4.946059213495985e-05, "loss": 2.2758, "step": 2832 }, { "epoch": 0.38, "grad_norm": 0.28125, "learning_rate": 4.946015473264189e-05, "loss": 2.2566, "step": 2833 }, { "epoch": 0.38, "grad_norm": 0.2890625, "learning_rate": 4.945971715498811e-05, "loss": 2.3222, "step": 2834 }, { "epoch": 0.38, "grad_norm": 0.296875, "learning_rate": 4.9459279402001645e-05, "loss": 2.2725, "step": 2835 }, { "epoch": 0.38, "grad_norm": 0.31640625, "learning_rate": 4.9458841473685626e-05, "loss": 2.3007, "step": 2836 }, { "epoch": 0.38, "grad_norm": 0.302734375, "learning_rate": 4.945840337004319e-05, "loss": 2.3114, "step": 2837 }, { "epoch": 0.38, "grad_norm": 0.298828125, "learning_rate": 4.9457965091077486e-05, "loss": 2.2923, "step": 2838 }, { "epoch": 0.38, "grad_norm": 0.333984375, "learning_rate": 4.945752663679165e-05, "loss": 2.2792, "step": 2839 }, { "epoch": 0.38, "grad_norm": 0.31640625, "learning_rate": 4.945708800718882e-05, "loss": 2.289, "step": 2840 }, { "epoch": 0.38, "grad_norm": 0.33203125, "learning_rate": 4.9456649202272156e-05, "loss": 2.2882, "step": 2841 }, { "epoch": 0.38, "grad_norm": 0.3125, "learning_rate": 4.9456210222044784e-05, "loss": 2.2701, "step": 2842 }, { "epoch": 0.38, "grad_norm": 0.296875, "learning_rate": 4.945577106650987e-05, "loss": 2.2584, "step": 2843 }, { "epoch": 0.38, "grad_norm": 0.310546875, "learning_rate": 4.945533173567054e-05, "loss": 2.282, "step": 2844 }, { "epoch": 0.38, "grad_norm": 0.298828125, "learning_rate": 4.945489222952997e-05, "loss": 2.2832, "step": 2845 }, { "epoch": 0.38, "grad_norm": 0.318359375, "learning_rate": 4.945445254809129e-05, "loss": 2.2612, "step": 2846 }, { "epoch": 0.38, "grad_norm": 0.30078125, "learning_rate": 4.945401269135766e-05, "loss": 2.2902, "step": 2847 }, { "epoch": 0.38, "grad_norm": 0.30078125, "learning_rate": 4.945357265933223e-05, "loss": 2.2943, "step": 2848 }, { "epoch": 0.38, "grad_norm": 0.333984375, "learning_rate": 4.945313245201816e-05, "loss": 2.2472, "step": 2849 }, { "epoch": 0.38, "grad_norm": 0.30859375, "learning_rate": 4.945269206941859e-05, "loss": 2.2587, "step": 2850 }, { "epoch": 0.38, "grad_norm": 0.298828125, "learning_rate": 4.9452251511536695e-05, "loss": 2.2784, "step": 2851 }, { "epoch": 0.38, "grad_norm": 0.291015625, "learning_rate": 4.945181077837563e-05, "loss": 2.3129, "step": 2852 }, { "epoch": 0.38, "grad_norm": 0.279296875, "learning_rate": 4.945136986993854e-05, "loss": 2.3149, "step": 2853 }, { "epoch": 0.38, "grad_norm": 0.310546875, "learning_rate": 4.94509287862286e-05, "loss": 2.3017, "step": 2854 }, { "epoch": 0.38, "grad_norm": 0.30859375, "learning_rate": 4.945048752724897e-05, "loss": 2.2888, "step": 2855 }, { "epoch": 0.38, "grad_norm": 0.330078125, "learning_rate": 4.945004609300281e-05, "loss": 2.247, "step": 2856 }, { "epoch": 0.38, "grad_norm": 0.328125, "learning_rate": 4.944960448349328e-05, "loss": 2.321, "step": 2857 }, { "epoch": 0.38, "grad_norm": 0.310546875, "learning_rate": 4.944916269872355e-05, "loss": 2.2885, "step": 2858 }, { "epoch": 0.38, "grad_norm": 0.296875, "learning_rate": 4.944872073869679e-05, "loss": 2.3002, "step": 2859 }, { "epoch": 0.38, "grad_norm": 0.30078125, "learning_rate": 4.944827860341617e-05, "loss": 2.2695, "step": 2860 }, { "epoch": 0.38, "grad_norm": 0.306640625, "learning_rate": 4.944783629288484e-05, "loss": 2.2787, "step": 2861 }, { "epoch": 0.38, "grad_norm": 0.28125, "learning_rate": 4.9447393807105994e-05, "loss": 2.2917, "step": 2862 }, { "epoch": 0.38, "grad_norm": 0.3125, "learning_rate": 4.94469511460828e-05, "loss": 2.262, "step": 2863 }, { "epoch": 0.38, "grad_norm": 0.30078125, "learning_rate": 4.944650830981842e-05, "loss": 2.3146, "step": 2864 }, { "epoch": 0.38, "grad_norm": 0.314453125, "learning_rate": 4.944606529831603e-05, "loss": 2.2854, "step": 2865 }, { "epoch": 0.38, "grad_norm": 0.298828125, "learning_rate": 4.9445622111578817e-05, "loss": 2.2551, "step": 2866 }, { "epoch": 0.38, "grad_norm": 0.29296875, "learning_rate": 4.944517874960995e-05, "loss": 2.2782, "step": 2867 }, { "epoch": 0.38, "grad_norm": 0.296875, "learning_rate": 4.94447352124126e-05, "loss": 2.2999, "step": 2868 }, { "epoch": 0.38, "grad_norm": 0.296875, "learning_rate": 4.944429149998996e-05, "loss": 2.2914, "step": 2869 }, { "epoch": 0.38, "grad_norm": 0.279296875, "learning_rate": 4.944384761234521e-05, "loss": 2.2944, "step": 2870 }, { "epoch": 0.38, "grad_norm": 0.291015625, "learning_rate": 4.944340354948152e-05, "loss": 2.2861, "step": 2871 }, { "epoch": 0.38, "grad_norm": 0.294921875, "learning_rate": 4.944295931140208e-05, "loss": 2.2996, "step": 2872 }, { "epoch": 0.38, "grad_norm": 0.298828125, "learning_rate": 4.944251489811007e-05, "loss": 2.278, "step": 2873 }, { "epoch": 0.38, "grad_norm": 0.296875, "learning_rate": 4.9442070309608686e-05, "loss": 2.2782, "step": 2874 }, { "epoch": 0.38, "grad_norm": 0.296875, "learning_rate": 4.9441625545901106e-05, "loss": 2.3003, "step": 2875 }, { "epoch": 0.38, "grad_norm": 0.29296875, "learning_rate": 4.944118060699052e-05, "loss": 2.2463, "step": 2876 }, { "epoch": 0.38, "grad_norm": 0.30859375, "learning_rate": 4.944073549288012e-05, "loss": 2.2955, "step": 2877 }, { "epoch": 0.38, "grad_norm": 0.32421875, "learning_rate": 4.94402902035731e-05, "loss": 2.2768, "step": 2878 }, { "epoch": 0.38, "grad_norm": 0.294921875, "learning_rate": 4.943984473907264e-05, "loss": 2.2551, "step": 2879 }, { "epoch": 0.38, "grad_norm": 0.296875, "learning_rate": 4.9439399099381946e-05, "loss": 2.3044, "step": 2880 }, { "epoch": 0.38, "grad_norm": 0.3046875, "learning_rate": 4.94389532845042e-05, "loss": 2.2841, "step": 2881 }, { "epoch": 0.38, "grad_norm": 0.298828125, "learning_rate": 4.943850729444261e-05, "loss": 2.3395, "step": 2882 }, { "epoch": 0.38, "grad_norm": 0.310546875, "learning_rate": 4.9438061129200365e-05, "loss": 2.3105, "step": 2883 }, { "epoch": 0.38, "grad_norm": 0.296875, "learning_rate": 4.943761478878066e-05, "loss": 2.3047, "step": 2884 }, { "epoch": 0.38, "grad_norm": 0.314453125, "learning_rate": 4.943716827318671e-05, "loss": 2.2816, "step": 2885 }, { "epoch": 0.38, "grad_norm": 0.30078125, "learning_rate": 4.94367215824217e-05, "loss": 2.2864, "step": 2886 }, { "epoch": 0.39, "grad_norm": 0.32421875, "learning_rate": 4.943627471648884e-05, "loss": 2.2983, "step": 2887 }, { "epoch": 0.39, "grad_norm": 0.3046875, "learning_rate": 4.9435827675391335e-05, "loss": 2.2932, "step": 2888 }, { "epoch": 0.39, "grad_norm": 0.291015625, "learning_rate": 4.9435380459132376e-05, "loss": 2.2821, "step": 2889 }, { "epoch": 0.39, "grad_norm": 0.31640625, "learning_rate": 4.943493306771518e-05, "loss": 2.2884, "step": 2890 }, { "epoch": 0.39, "grad_norm": 0.310546875, "learning_rate": 4.943448550114296e-05, "loss": 2.2582, "step": 2891 }, { "epoch": 0.39, "grad_norm": 0.3046875, "learning_rate": 4.9434037759418904e-05, "loss": 2.3234, "step": 2892 }, { "epoch": 0.39, "grad_norm": 0.30078125, "learning_rate": 4.943358984254624e-05, "loss": 2.3265, "step": 2893 }, { "epoch": 0.39, "grad_norm": 0.294921875, "learning_rate": 4.943314175052817e-05, "loss": 2.2611, "step": 2894 }, { "epoch": 0.39, "grad_norm": 0.302734375, "learning_rate": 4.9432693483367916e-05, "loss": 2.2666, "step": 2895 }, { "epoch": 0.39, "grad_norm": 0.28515625, "learning_rate": 4.9432245041068684e-05, "loss": 2.27, "step": 2896 }, { "epoch": 0.39, "grad_norm": 0.29296875, "learning_rate": 4.943179642363368e-05, "loss": 2.2784, "step": 2897 }, { "epoch": 0.39, "grad_norm": 0.3046875, "learning_rate": 4.9431347631066126e-05, "loss": 2.285, "step": 2898 }, { "epoch": 0.39, "grad_norm": 0.294921875, "learning_rate": 4.943089866336925e-05, "loss": 2.291, "step": 2899 }, { "epoch": 0.39, "grad_norm": 0.29296875, "learning_rate": 4.943044952054626e-05, "loss": 2.2957, "step": 2900 }, { "epoch": 0.39, "grad_norm": 0.291015625, "learning_rate": 4.943000020260038e-05, "loss": 2.2527, "step": 2901 }, { "epoch": 0.39, "grad_norm": 0.30078125, "learning_rate": 4.942955070953482e-05, "loss": 2.2739, "step": 2902 }, { "epoch": 0.39, "grad_norm": 0.298828125, "learning_rate": 4.942910104135281e-05, "loss": 2.2781, "step": 2903 }, { "epoch": 0.39, "grad_norm": 0.30859375, "learning_rate": 4.942865119805758e-05, "loss": 2.2822, "step": 2904 }, { "epoch": 0.39, "grad_norm": 0.29296875, "learning_rate": 4.9428201179652346e-05, "loss": 2.2703, "step": 2905 }, { "epoch": 0.39, "grad_norm": 0.306640625, "learning_rate": 4.942775098614034e-05, "loss": 2.2546, "step": 2906 }, { "epoch": 0.39, "grad_norm": 0.2890625, "learning_rate": 4.9427300617524776e-05, "loss": 2.3136, "step": 2907 }, { "epoch": 0.39, "grad_norm": 0.34375, "learning_rate": 4.9426850073808895e-05, "loss": 2.301, "step": 2908 }, { "epoch": 0.39, "grad_norm": 0.3125, "learning_rate": 4.942639935499592e-05, "loss": 2.2953, "step": 2909 }, { "epoch": 0.39, "grad_norm": 0.294921875, "learning_rate": 4.942594846108909e-05, "loss": 2.3397, "step": 2910 }, { "epoch": 0.39, "grad_norm": 0.296875, "learning_rate": 4.942549739209163e-05, "loss": 2.2842, "step": 2911 }, { "epoch": 0.39, "grad_norm": 0.306640625, "learning_rate": 4.942504614800677e-05, "loss": 2.2861, "step": 2912 }, { "epoch": 0.39, "grad_norm": 0.306640625, "learning_rate": 4.942459472883776e-05, "loss": 2.2508, "step": 2913 }, { "epoch": 0.39, "grad_norm": 0.296875, "learning_rate": 4.942414313458782e-05, "loss": 2.2957, "step": 2914 }, { "epoch": 0.39, "grad_norm": 0.314453125, "learning_rate": 4.942369136526019e-05, "loss": 2.3012, "step": 2915 }, { "epoch": 0.39, "grad_norm": 0.298828125, "learning_rate": 4.9423239420858116e-05, "loss": 2.3046, "step": 2916 }, { "epoch": 0.39, "grad_norm": 0.310546875, "learning_rate": 4.942278730138483e-05, "loss": 2.2711, "step": 2917 }, { "epoch": 0.39, "grad_norm": 0.2890625, "learning_rate": 4.942233500684357e-05, "loss": 2.2392, "step": 2918 }, { "epoch": 0.39, "grad_norm": 0.298828125, "learning_rate": 4.94218825372376e-05, "loss": 2.2948, "step": 2919 }, { "epoch": 0.39, "grad_norm": 0.306640625, "learning_rate": 4.942142989257013e-05, "loss": 2.2684, "step": 2920 }, { "epoch": 0.39, "grad_norm": 0.302734375, "learning_rate": 4.942097707284443e-05, "loss": 2.2466, "step": 2921 }, { "epoch": 0.39, "grad_norm": 0.30859375, "learning_rate": 4.942052407806373e-05, "loss": 2.2735, "step": 2922 }, { "epoch": 0.39, "grad_norm": 0.3203125, "learning_rate": 4.94200709082313e-05, "loss": 2.2723, "step": 2923 }, { "epoch": 0.39, "grad_norm": 0.294921875, "learning_rate": 4.941961756335036e-05, "loss": 2.3162, "step": 2924 }, { "epoch": 0.39, "grad_norm": 0.30078125, "learning_rate": 4.9419164043424185e-05, "loss": 2.3082, "step": 2925 }, { "epoch": 0.39, "grad_norm": 0.318359375, "learning_rate": 4.9418710348456e-05, "loss": 2.2728, "step": 2926 }, { "epoch": 0.39, "grad_norm": 0.314453125, "learning_rate": 4.9418256478449075e-05, "loss": 2.2788, "step": 2927 }, { "epoch": 0.39, "grad_norm": 0.314453125, "learning_rate": 4.9417802433406666e-05, "loss": 2.2672, "step": 2928 }, { "epoch": 0.39, "grad_norm": 0.310546875, "learning_rate": 4.9417348213332014e-05, "loss": 2.2691, "step": 2929 }, { "epoch": 0.39, "grad_norm": 0.296875, "learning_rate": 4.941689381822839e-05, "loss": 2.2913, "step": 2930 }, { "epoch": 0.39, "grad_norm": 0.333984375, "learning_rate": 4.941643924809904e-05, "loss": 2.262, "step": 2931 }, { "epoch": 0.39, "grad_norm": 0.294921875, "learning_rate": 4.9415984502947224e-05, "loss": 2.2896, "step": 2932 }, { "epoch": 0.39, "grad_norm": 0.3125, "learning_rate": 4.941552958277621e-05, "loss": 2.2612, "step": 2933 }, { "epoch": 0.39, "grad_norm": 0.298828125, "learning_rate": 4.941507448758924e-05, "loss": 2.2823, "step": 2934 }, { "epoch": 0.39, "grad_norm": 0.31640625, "learning_rate": 4.94146192173896e-05, "loss": 2.2787, "step": 2935 }, { "epoch": 0.39, "grad_norm": 0.294921875, "learning_rate": 4.941416377218054e-05, "loss": 2.3138, "step": 2936 }, { "epoch": 0.39, "grad_norm": 0.31640625, "learning_rate": 4.941370815196532e-05, "loss": 2.3183, "step": 2937 }, { "epoch": 0.39, "grad_norm": 0.31640625, "learning_rate": 4.9413252356747216e-05, "loss": 2.2968, "step": 2938 }, { "epoch": 0.39, "grad_norm": 0.30859375, "learning_rate": 4.9412796386529504e-05, "loss": 2.2467, "step": 2939 }, { "epoch": 0.39, "grad_norm": 0.298828125, "learning_rate": 4.941234024131542e-05, "loss": 2.2718, "step": 2940 }, { "epoch": 0.39, "grad_norm": 0.2890625, "learning_rate": 4.9411883921108276e-05, "loss": 2.2902, "step": 2941 }, { "epoch": 0.39, "grad_norm": 0.33203125, "learning_rate": 4.941142742591131e-05, "loss": 2.2346, "step": 2942 }, { "epoch": 0.39, "grad_norm": 0.310546875, "learning_rate": 4.9410970755727806e-05, "loss": 2.285, "step": 2943 }, { "epoch": 0.39, "grad_norm": 0.33984375, "learning_rate": 4.941051391056104e-05, "loss": 2.2992, "step": 2944 }, { "epoch": 0.39, "grad_norm": 0.30078125, "learning_rate": 4.941005689041428e-05, "loss": 2.2942, "step": 2945 }, { "epoch": 0.39, "grad_norm": 0.2890625, "learning_rate": 4.940959969529081e-05, "loss": 2.2688, "step": 2946 }, { "epoch": 0.39, "grad_norm": 0.30859375, "learning_rate": 4.94091423251939e-05, "loss": 2.284, "step": 2947 }, { "epoch": 0.39, "grad_norm": 0.302734375, "learning_rate": 4.9408684780126834e-05, "loss": 2.2635, "step": 2948 }, { "epoch": 0.39, "grad_norm": 0.3125, "learning_rate": 4.9408227060092894e-05, "loss": 2.2983, "step": 2949 }, { "epoch": 0.39, "grad_norm": 0.326171875, "learning_rate": 4.9407769165095354e-05, "loss": 2.3027, "step": 2950 }, { "epoch": 0.39, "grad_norm": 0.310546875, "learning_rate": 4.94073110951375e-05, "loss": 2.2952, "step": 2951 }, { "epoch": 0.39, "grad_norm": 0.294921875, "learning_rate": 4.940685285022261e-05, "loss": 2.2937, "step": 2952 }, { "epoch": 0.39, "grad_norm": 0.302734375, "learning_rate": 4.940639443035397e-05, "loss": 2.2854, "step": 2953 }, { "epoch": 0.39, "grad_norm": 0.302734375, "learning_rate": 4.940593583553488e-05, "loss": 2.2856, "step": 2954 }, { "epoch": 0.39, "grad_norm": 0.298828125, "learning_rate": 4.940547706576861e-05, "loss": 2.3056, "step": 2955 }, { "epoch": 0.39, "grad_norm": 0.314453125, "learning_rate": 4.940501812105846e-05, "loss": 2.3124, "step": 2956 }, { "epoch": 0.39, "grad_norm": 0.298828125, "learning_rate": 4.94045590014077e-05, "loss": 2.2615, "step": 2957 }, { "epoch": 0.39, "grad_norm": 0.31640625, "learning_rate": 4.9404099706819654e-05, "loss": 2.2596, "step": 2958 }, { "epoch": 0.39, "grad_norm": 0.310546875, "learning_rate": 4.9403640237297586e-05, "loss": 2.2577, "step": 2959 }, { "epoch": 0.39, "grad_norm": 0.28515625, "learning_rate": 4.9403180592844804e-05, "loss": 2.2794, "step": 2960 }, { "epoch": 0.39, "grad_norm": 0.302734375, "learning_rate": 4.94027207734646e-05, "loss": 2.2878, "step": 2961 }, { "epoch": 0.4, "grad_norm": 0.314453125, "learning_rate": 4.940226077916027e-05, "loss": 2.2678, "step": 2962 }, { "epoch": 0.4, "grad_norm": 0.302734375, "learning_rate": 4.940180060993511e-05, "loss": 2.241, "step": 2963 }, { "epoch": 0.4, "grad_norm": 0.28515625, "learning_rate": 4.940134026579242e-05, "loss": 2.2794, "step": 2964 }, { "epoch": 0.4, "grad_norm": 0.29296875, "learning_rate": 4.940087974673549e-05, "loss": 2.2505, "step": 2965 }, { "epoch": 0.4, "grad_norm": 0.30078125, "learning_rate": 4.940041905276763e-05, "loss": 2.2806, "step": 2966 }, { "epoch": 0.4, "grad_norm": 0.291015625, "learning_rate": 4.9399958183892154e-05, "loss": 2.3065, "step": 2967 }, { "epoch": 0.4, "grad_norm": 0.318359375, "learning_rate": 4.939949714011235e-05, "loss": 2.2569, "step": 2968 }, { "epoch": 0.4, "grad_norm": 0.310546875, "learning_rate": 4.9399035921431525e-05, "loss": 2.2599, "step": 2969 }, { "epoch": 0.4, "grad_norm": 0.30078125, "learning_rate": 4.939857452785298e-05, "loss": 2.2913, "step": 2970 }, { "epoch": 0.4, "grad_norm": 0.29296875, "learning_rate": 4.939811295938004e-05, "loss": 2.2655, "step": 2971 }, { "epoch": 0.4, "grad_norm": 0.302734375, "learning_rate": 4.9397651216015996e-05, "loss": 2.256, "step": 2972 }, { "epoch": 0.4, "grad_norm": 0.3046875, "learning_rate": 4.9397189297764165e-05, "loss": 2.2911, "step": 2973 }, { "epoch": 0.4, "grad_norm": 0.283203125, "learning_rate": 4.939672720462787e-05, "loss": 2.2761, "step": 2974 }, { "epoch": 0.4, "grad_norm": 0.287109375, "learning_rate": 4.939626493661039e-05, "loss": 2.306, "step": 2975 }, { "epoch": 0.4, "grad_norm": 0.296875, "learning_rate": 4.939580249371508e-05, "loss": 2.2793, "step": 2976 }, { "epoch": 0.4, "grad_norm": 0.31640625, "learning_rate": 4.939533987594522e-05, "loss": 2.2802, "step": 2977 }, { "epoch": 0.4, "grad_norm": 0.296875, "learning_rate": 4.9394877083304146e-05, "loss": 2.2436, "step": 2978 }, { "epoch": 0.4, "grad_norm": 0.2890625, "learning_rate": 4.939441411579517e-05, "loss": 2.2715, "step": 2979 }, { "epoch": 0.4, "grad_norm": 0.294921875, "learning_rate": 4.939395097342162e-05, "loss": 2.2881, "step": 2980 }, { "epoch": 0.4, "grad_norm": 0.275390625, "learning_rate": 4.9393487656186794e-05, "loss": 2.2902, "step": 2981 }, { "epoch": 0.4, "grad_norm": 0.28125, "learning_rate": 4.939302416409403e-05, "loss": 2.2959, "step": 2982 }, { "epoch": 0.4, "grad_norm": 0.28515625, "learning_rate": 4.9392560497146645e-05, "loss": 2.2547, "step": 2983 }, { "epoch": 0.4, "grad_norm": 0.3046875, "learning_rate": 4.939209665534797e-05, "loss": 2.3035, "step": 2984 }, { "epoch": 0.4, "grad_norm": 0.2890625, "learning_rate": 4.9391632638701315e-05, "loss": 2.3035, "step": 2985 }, { "epoch": 0.4, "grad_norm": 0.298828125, "learning_rate": 4.9391168447210026e-05, "loss": 2.2911, "step": 2986 }, { "epoch": 0.4, "grad_norm": 0.302734375, "learning_rate": 4.9390704080877415e-05, "loss": 2.2874, "step": 2987 }, { "epoch": 0.4, "grad_norm": 0.29296875, "learning_rate": 4.939023953970681e-05, "loss": 2.2711, "step": 2988 }, { "epoch": 0.4, "grad_norm": 0.298828125, "learning_rate": 4.938977482370155e-05, "loss": 2.3175, "step": 2989 }, { "epoch": 0.4, "grad_norm": 0.296875, "learning_rate": 4.938930993286497e-05, "loss": 2.2731, "step": 2990 }, { "epoch": 0.4, "grad_norm": 0.322265625, "learning_rate": 4.938884486720038e-05, "loss": 2.2863, "step": 2991 }, { "epoch": 0.4, "grad_norm": 0.296875, "learning_rate": 4.938837962671114e-05, "loss": 2.2825, "step": 2992 }, { "epoch": 0.4, "grad_norm": 0.3125, "learning_rate": 4.9387914211400565e-05, "loss": 2.2974, "step": 2993 }, { "epoch": 0.4, "grad_norm": 0.3046875, "learning_rate": 4.938744862127201e-05, "loss": 2.2699, "step": 2994 }, { "epoch": 0.4, "grad_norm": 0.298828125, "learning_rate": 4.938698285632879e-05, "loss": 2.3443, "step": 2995 }, { "epoch": 0.4, "grad_norm": 0.298828125, "learning_rate": 4.938651691657427e-05, "loss": 2.3055, "step": 2996 }, { "epoch": 0.4, "grad_norm": 0.291015625, "learning_rate": 4.9386050802011765e-05, "loss": 2.2767, "step": 2997 }, { "epoch": 0.4, "grad_norm": 0.3203125, "learning_rate": 4.9385584512644634e-05, "loss": 2.2869, "step": 2998 }, { "epoch": 0.4, "grad_norm": 0.291015625, "learning_rate": 4.938511804847621e-05, "loss": 2.2856, "step": 2999 }, { "epoch": 0.4, "grad_norm": 0.298828125, "learning_rate": 4.9384651409509836e-05, "loss": 2.2664, "step": 3000 }, { "epoch": 0.4, "eval_loss": 2.2794189453125, "eval_runtime": 615.7859, "eval_samples_per_second": 62.962, "eval_steps_per_second": 7.871, "step": 3000 }, { "epoch": 0.4, "grad_norm": 0.3203125, "learning_rate": 4.9384184595748863e-05, "loss": 2.2905, "step": 3001 }, { "epoch": 0.4, "grad_norm": 0.29296875, "learning_rate": 4.938371760719663e-05, "loss": 2.2822, "step": 3002 }, { "epoch": 0.4, "grad_norm": 0.310546875, "learning_rate": 4.9383250443856495e-05, "loss": 2.263, "step": 3003 }, { "epoch": 0.4, "grad_norm": 0.29296875, "learning_rate": 4.93827831057318e-05, "loss": 2.2953, "step": 3004 }, { "epoch": 0.4, "grad_norm": 0.30078125, "learning_rate": 4.938231559282589e-05, "loss": 2.2879, "step": 3005 }, { "epoch": 0.4, "grad_norm": 0.3046875, "learning_rate": 4.938184790514213e-05, "loss": 2.2827, "step": 3006 }, { "epoch": 0.4, "grad_norm": 0.3046875, "learning_rate": 4.9381380042683854e-05, "loss": 2.2453, "step": 3007 }, { "epoch": 0.4, "grad_norm": 0.287109375, "learning_rate": 4.938091200545444e-05, "loss": 2.2578, "step": 3008 }, { "epoch": 0.4, "grad_norm": 0.30859375, "learning_rate": 4.9380443793457216e-05, "loss": 2.2922, "step": 3009 }, { "epoch": 0.4, "grad_norm": 0.2890625, "learning_rate": 4.9379975406695554e-05, "loss": 2.2745, "step": 3010 }, { "epoch": 0.4, "grad_norm": 0.30078125, "learning_rate": 4.9379506845172816e-05, "loss": 2.3028, "step": 3011 }, { "epoch": 0.4, "grad_norm": 0.3046875, "learning_rate": 4.9379038108892353e-05, "loss": 2.2684, "step": 3012 }, { "epoch": 0.4, "grad_norm": 0.30859375, "learning_rate": 4.9378569197857515e-05, "loss": 2.2752, "step": 3013 }, { "epoch": 0.4, "grad_norm": 0.3046875, "learning_rate": 4.937810011207168e-05, "loss": 2.2905, "step": 3014 }, { "epoch": 0.4, "grad_norm": 0.318359375, "learning_rate": 4.93776308515382e-05, "loss": 2.2575, "step": 3015 }, { "epoch": 0.4, "grad_norm": 0.29296875, "learning_rate": 4.9377161416260456e-05, "loss": 2.2919, "step": 3016 }, { "epoch": 0.4, "grad_norm": 0.306640625, "learning_rate": 4.9376691806241785e-05, "loss": 2.274, "step": 3017 }, { "epoch": 0.4, "grad_norm": 0.302734375, "learning_rate": 4.937622202148558e-05, "loss": 2.3084, "step": 3018 }, { "epoch": 0.4, "grad_norm": 0.302734375, "learning_rate": 4.937575206199519e-05, "loss": 2.2788, "step": 3019 }, { "epoch": 0.4, "grad_norm": 0.3046875, "learning_rate": 4.9375281927774e-05, "loss": 2.284, "step": 3020 }, { "epoch": 0.4, "grad_norm": 0.287109375, "learning_rate": 4.9374811618825355e-05, "loss": 2.2516, "step": 3021 }, { "epoch": 0.4, "grad_norm": 0.3046875, "learning_rate": 4.937434113515265e-05, "loss": 2.2769, "step": 3022 }, { "epoch": 0.4, "grad_norm": 0.3125, "learning_rate": 4.937387047675925e-05, "loss": 2.2562, "step": 3023 }, { "epoch": 0.4, "grad_norm": 0.31640625, "learning_rate": 4.9373399643648534e-05, "loss": 2.2731, "step": 3024 }, { "epoch": 0.4, "grad_norm": 0.294921875, "learning_rate": 4.9372928635823865e-05, "loss": 2.2895, "step": 3025 }, { "epoch": 0.4, "grad_norm": 0.27734375, "learning_rate": 4.937245745328862e-05, "loss": 2.3042, "step": 3026 }, { "epoch": 0.4, "grad_norm": 0.30078125, "learning_rate": 4.9371986096046193e-05, "loss": 2.29, "step": 3027 }, { "epoch": 0.4, "grad_norm": 0.29296875, "learning_rate": 4.9371514564099946e-05, "loss": 2.2758, "step": 3028 }, { "epoch": 0.4, "grad_norm": 0.28125, "learning_rate": 4.937104285745326e-05, "loss": 2.2954, "step": 3029 }, { "epoch": 0.4, "grad_norm": 0.337890625, "learning_rate": 4.9370570976109536e-05, "loss": 2.2632, "step": 3030 }, { "epoch": 0.4, "grad_norm": 0.296875, "learning_rate": 4.9370098920072136e-05, "loss": 2.2747, "step": 3031 }, { "epoch": 0.4, "grad_norm": 0.2890625, "learning_rate": 4.936962668934444e-05, "loss": 2.2368, "step": 3032 }, { "epoch": 0.4, "grad_norm": 0.34765625, "learning_rate": 4.936915428392985e-05, "loss": 2.3123, "step": 3033 }, { "epoch": 0.4, "grad_norm": 0.28125, "learning_rate": 4.9368681703831757e-05, "loss": 2.2779, "step": 3034 }, { "epoch": 0.4, "grad_norm": 0.296875, "learning_rate": 4.936820894905352e-05, "loss": 2.2992, "step": 3035 }, { "epoch": 0.4, "grad_norm": 0.296875, "learning_rate": 4.936773601959855e-05, "loss": 2.2753, "step": 3036 }, { "epoch": 0.41, "grad_norm": 0.2890625, "learning_rate": 4.936726291547023e-05, "loss": 2.2943, "step": 3037 }, { "epoch": 0.41, "grad_norm": 0.287109375, "learning_rate": 4.936678963667195e-05, "loss": 2.2862, "step": 3038 }, { "epoch": 0.41, "grad_norm": 0.3203125, "learning_rate": 4.9366316183207107e-05, "loss": 2.2654, "step": 3039 }, { "epoch": 0.41, "grad_norm": 0.306640625, "learning_rate": 4.93658425550791e-05, "loss": 2.2604, "step": 3040 }, { "epoch": 0.41, "grad_norm": 0.30078125, "learning_rate": 4.936536875229131e-05, "loss": 2.3053, "step": 3041 }, { "epoch": 0.41, "grad_norm": 0.29296875, "learning_rate": 4.936489477484714e-05, "loss": 2.2433, "step": 3042 }, { "epoch": 0.41, "grad_norm": 0.294921875, "learning_rate": 4.936442062275e-05, "loss": 2.2815, "step": 3043 }, { "epoch": 0.41, "grad_norm": 0.30078125, "learning_rate": 4.936394629600326e-05, "loss": 2.2917, "step": 3044 }, { "epoch": 0.41, "grad_norm": 0.29296875, "learning_rate": 4.936347179461035e-05, "loss": 2.2826, "step": 3045 }, { "epoch": 0.41, "grad_norm": 0.291015625, "learning_rate": 4.936299711857465e-05, "loss": 2.3045, "step": 3046 }, { "epoch": 0.41, "grad_norm": 0.30078125, "learning_rate": 4.9362522267899574e-05, "loss": 2.2762, "step": 3047 }, { "epoch": 0.41, "grad_norm": 0.3125, "learning_rate": 4.936204724258853e-05, "loss": 2.3017, "step": 3048 }, { "epoch": 0.41, "grad_norm": 0.2734375, "learning_rate": 4.93615720426449e-05, "loss": 2.286, "step": 3049 }, { "epoch": 0.41, "grad_norm": 0.30078125, "learning_rate": 4.936109666807212e-05, "loss": 2.2521, "step": 3050 }, { "epoch": 0.41, "grad_norm": 0.3046875, "learning_rate": 4.936062111887357e-05, "loss": 2.273, "step": 3051 }, { "epoch": 0.41, "grad_norm": 0.28125, "learning_rate": 4.936014539505269e-05, "loss": 2.2961, "step": 3052 }, { "epoch": 0.41, "grad_norm": 0.28125, "learning_rate": 4.935966949661286e-05, "loss": 2.2657, "step": 3053 }, { "epoch": 0.41, "grad_norm": 0.275390625, "learning_rate": 4.935919342355751e-05, "loss": 2.2925, "step": 3054 }, { "epoch": 0.41, "grad_norm": 0.294921875, "learning_rate": 4.935871717589004e-05, "loss": 2.2731, "step": 3055 }, { "epoch": 0.41, "grad_norm": 0.287109375, "learning_rate": 4.935824075361388e-05, "loss": 2.2621, "step": 3056 }, { "epoch": 0.41, "grad_norm": 0.3125, "learning_rate": 4.9357764156732434e-05, "loss": 2.2999, "step": 3057 }, { "epoch": 0.41, "grad_norm": 0.283203125, "learning_rate": 4.935728738524912e-05, "loss": 2.3159, "step": 3058 }, { "epoch": 0.41, "grad_norm": 0.302734375, "learning_rate": 4.935681043916735e-05, "loss": 2.3007, "step": 3059 }, { "epoch": 0.41, "grad_norm": 0.283203125, "learning_rate": 4.935633331849055e-05, "loss": 2.2778, "step": 3060 }, { "epoch": 0.41, "grad_norm": 0.294921875, "learning_rate": 4.935585602322214e-05, "loss": 2.2794, "step": 3061 }, { "epoch": 0.41, "grad_norm": 0.302734375, "learning_rate": 4.935537855336554e-05, "loss": 2.3022, "step": 3062 }, { "epoch": 0.41, "grad_norm": 0.30859375, "learning_rate": 4.935490090892417e-05, "loss": 2.2806, "step": 3063 }, { "epoch": 0.41, "grad_norm": 0.291015625, "learning_rate": 4.935442308990146e-05, "loss": 2.2554, "step": 3064 }, { "epoch": 0.41, "grad_norm": 0.2890625, "learning_rate": 4.9353945096300834e-05, "loss": 2.2929, "step": 3065 }, { "epoch": 0.41, "grad_norm": 0.2890625, "learning_rate": 4.9353466928125715e-05, "loss": 2.2768, "step": 3066 }, { "epoch": 0.41, "grad_norm": 0.294921875, "learning_rate": 4.9352988585379524e-05, "loss": 2.283, "step": 3067 }, { "epoch": 0.41, "grad_norm": 0.2890625, "learning_rate": 4.9352510068065704e-05, "loss": 2.2727, "step": 3068 }, { "epoch": 0.41, "grad_norm": 0.28515625, "learning_rate": 4.935203137618768e-05, "loss": 2.2684, "step": 3069 }, { "epoch": 0.41, "grad_norm": 0.291015625, "learning_rate": 4.935155250974887e-05, "loss": 2.299, "step": 3070 }, { "epoch": 0.41, "grad_norm": 0.294921875, "learning_rate": 4.935107346875273e-05, "loss": 2.2907, "step": 3071 }, { "epoch": 0.41, "grad_norm": 0.294921875, "learning_rate": 4.9350594253202676e-05, "loss": 2.2879, "step": 3072 }, { "epoch": 0.41, "grad_norm": 0.330078125, "learning_rate": 4.935011486310216e-05, "loss": 2.2947, "step": 3073 }, { "epoch": 0.41, "grad_norm": 0.302734375, "learning_rate": 4.9349635298454595e-05, "loss": 2.3214, "step": 3074 }, { "epoch": 0.41, "grad_norm": 0.287109375, "learning_rate": 4.934915555926344e-05, "loss": 2.2778, "step": 3075 }, { "epoch": 0.41, "grad_norm": 0.306640625, "learning_rate": 4.934867564553212e-05, "loss": 2.2676, "step": 3076 }, { "epoch": 0.41, "grad_norm": 0.31640625, "learning_rate": 4.934819555726408e-05, "loss": 2.2655, "step": 3077 }, { "epoch": 0.41, "grad_norm": 0.314453125, "learning_rate": 4.934771529446277e-05, "loss": 2.3088, "step": 3078 }, { "epoch": 0.41, "grad_norm": 0.30078125, "learning_rate": 4.934723485713162e-05, "loss": 2.302, "step": 3079 }, { "epoch": 0.41, "grad_norm": 0.32421875, "learning_rate": 4.934675424527407e-05, "loss": 2.2533, "step": 3080 }, { "epoch": 0.41, "grad_norm": 0.294921875, "learning_rate": 4.934627345889358e-05, "loss": 2.3096, "step": 3081 }, { "epoch": 0.41, "grad_norm": 0.298828125, "learning_rate": 4.934579249799359e-05, "loss": 2.3006, "step": 3082 }, { "epoch": 0.41, "grad_norm": 0.291015625, "learning_rate": 4.934531136257755e-05, "loss": 2.2558, "step": 3083 }, { "epoch": 0.41, "grad_norm": 0.271484375, "learning_rate": 4.93448300526489e-05, "loss": 2.2874, "step": 3084 }, { "epoch": 0.41, "grad_norm": 0.30859375, "learning_rate": 4.9344348568211106e-05, "loss": 2.2819, "step": 3085 }, { "epoch": 0.41, "grad_norm": 0.30859375, "learning_rate": 4.93438669092676e-05, "loss": 2.2839, "step": 3086 }, { "epoch": 0.41, "grad_norm": 0.6171875, "learning_rate": 4.9343385075821854e-05, "loss": 2.297, "step": 3087 }, { "epoch": 0.41, "grad_norm": 0.296875, "learning_rate": 4.934290306787731e-05, "loss": 2.2788, "step": 3088 }, { "epoch": 0.41, "grad_norm": 0.314453125, "learning_rate": 4.9342420885437426e-05, "loss": 2.2711, "step": 3089 }, { "epoch": 0.41, "grad_norm": 0.3046875, "learning_rate": 4.934193852850565e-05, "loss": 2.2807, "step": 3090 }, { "epoch": 0.41, "grad_norm": 0.298828125, "learning_rate": 4.9341455997085456e-05, "loss": 2.2481, "step": 3091 }, { "epoch": 0.41, "grad_norm": 0.28515625, "learning_rate": 4.9340973291180295e-05, "loss": 2.302, "step": 3092 }, { "epoch": 0.41, "grad_norm": 0.30078125, "learning_rate": 4.9340490410793624e-05, "loss": 2.2732, "step": 3093 }, { "epoch": 0.41, "grad_norm": 0.296875, "learning_rate": 4.93400073559289e-05, "loss": 2.3099, "step": 3094 }, { "epoch": 0.41, "grad_norm": 0.28515625, "learning_rate": 4.93395241265896e-05, "loss": 2.2959, "step": 3095 }, { "epoch": 0.41, "grad_norm": 0.3203125, "learning_rate": 4.933904072277918e-05, "loss": 2.295, "step": 3096 }, { "epoch": 0.41, "grad_norm": 0.30078125, "learning_rate": 4.933855714450111e-05, "loss": 2.2809, "step": 3097 }, { "epoch": 0.41, "grad_norm": 0.28515625, "learning_rate": 4.9338073391758844e-05, "loss": 2.2769, "step": 3098 }, { "epoch": 0.41, "grad_norm": 0.310546875, "learning_rate": 4.933758946455586e-05, "loss": 2.2997, "step": 3099 }, { "epoch": 0.41, "grad_norm": 0.30859375, "learning_rate": 4.933710536289563e-05, "loss": 2.2863, "step": 3100 }, { "epoch": 0.41, "grad_norm": 0.310546875, "learning_rate": 4.933662108678161e-05, "loss": 2.2875, "step": 3101 }, { "epoch": 0.41, "grad_norm": 0.287109375, "learning_rate": 4.933613663621729e-05, "loss": 2.3044, "step": 3102 }, { "epoch": 0.41, "grad_norm": 0.3203125, "learning_rate": 4.933565201120612e-05, "loss": 2.282, "step": 3103 }, { "epoch": 0.41, "grad_norm": 0.291015625, "learning_rate": 4.93351672117516e-05, "loss": 2.2842, "step": 3104 }, { "epoch": 0.41, "grad_norm": 0.287109375, "learning_rate": 4.933468223785719e-05, "loss": 2.2664, "step": 3105 }, { "epoch": 0.41, "grad_norm": 0.298828125, "learning_rate": 4.933419708952636e-05, "loss": 2.284, "step": 3106 }, { "epoch": 0.41, "grad_norm": 0.291015625, "learning_rate": 4.9333711766762605e-05, "loss": 2.2527, "step": 3107 }, { "epoch": 0.41, "grad_norm": 0.29296875, "learning_rate": 4.933322626956939e-05, "loss": 2.2973, "step": 3108 }, { "epoch": 0.41, "grad_norm": 0.283203125, "learning_rate": 4.93327405979502e-05, "loss": 2.3039, "step": 3109 }, { "epoch": 0.41, "grad_norm": 0.3046875, "learning_rate": 4.9332254751908514e-05, "loss": 2.2659, "step": 3110 }, { "epoch": 0.41, "grad_norm": 0.28125, "learning_rate": 4.933176873144783e-05, "loss": 2.2769, "step": 3111 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.93312825365716e-05, "loss": 2.2919, "step": 3112 }, { "epoch": 0.42, "grad_norm": 0.310546875, "learning_rate": 4.9330796167283336e-05, "loss": 2.3017, "step": 3113 }, { "epoch": 0.42, "grad_norm": 0.298828125, "learning_rate": 4.9330309623586525e-05, "loss": 2.296, "step": 3114 }, { "epoch": 0.42, "grad_norm": 0.291015625, "learning_rate": 4.9329822905484634e-05, "loss": 2.2732, "step": 3115 }, { "epoch": 0.42, "grad_norm": 0.29296875, "learning_rate": 4.932933601298117e-05, "loss": 2.2946, "step": 3116 }, { "epoch": 0.42, "grad_norm": 0.2734375, "learning_rate": 4.932884894607962e-05, "loss": 2.2783, "step": 3117 }, { "epoch": 0.42, "grad_norm": 0.28125, "learning_rate": 4.932836170478347e-05, "loss": 2.2808, "step": 3118 }, { "epoch": 0.42, "grad_norm": 0.283203125, "learning_rate": 4.932787428909621e-05, "loss": 2.2856, "step": 3119 }, { "epoch": 0.42, "grad_norm": 0.296875, "learning_rate": 4.9327386699021346e-05, "loss": 2.3043, "step": 3120 }, { "epoch": 0.42, "grad_norm": 0.291015625, "learning_rate": 4.932689893456236e-05, "loss": 2.281, "step": 3121 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.932641099572276e-05, "loss": 2.2568, "step": 3122 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.932592288250604e-05, "loss": 2.2807, "step": 3123 }, { "epoch": 0.42, "grad_norm": 0.3046875, "learning_rate": 4.932543459491569e-05, "loss": 2.3043, "step": 3124 }, { "epoch": 0.42, "grad_norm": 0.3046875, "learning_rate": 4.932494613295522e-05, "loss": 2.2854, "step": 3125 }, { "epoch": 0.42, "grad_norm": 0.2890625, "learning_rate": 4.932445749662814e-05, "loss": 2.294, "step": 3126 }, { "epoch": 0.42, "grad_norm": 0.298828125, "learning_rate": 4.932396868593793e-05, "loss": 2.2775, "step": 3127 }, { "epoch": 0.42, "grad_norm": 0.279296875, "learning_rate": 4.932347970088811e-05, "loss": 2.2828, "step": 3128 }, { "epoch": 0.42, "grad_norm": 0.31640625, "learning_rate": 4.932299054148217e-05, "loss": 2.2735, "step": 3129 }, { "epoch": 0.42, "grad_norm": 0.29296875, "learning_rate": 4.932250120772364e-05, "loss": 2.2837, "step": 3130 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.9322011699616014e-05, "loss": 2.2728, "step": 3131 }, { "epoch": 0.42, "grad_norm": 0.279296875, "learning_rate": 4.932152201716279e-05, "loss": 2.2991, "step": 3132 }, { "epoch": 0.42, "grad_norm": 0.3125, "learning_rate": 4.93210321603675e-05, "loss": 2.2688, "step": 3133 }, { "epoch": 0.42, "grad_norm": 0.302734375, "learning_rate": 4.9320542129233644e-05, "loss": 2.3076, "step": 3134 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.932005192376473e-05, "loss": 2.2551, "step": 3135 }, { "epoch": 0.42, "grad_norm": 0.3125, "learning_rate": 4.931956154396429e-05, "loss": 2.2439, "step": 3136 }, { "epoch": 0.42, "grad_norm": 0.283203125, "learning_rate": 4.9319070989835816e-05, "loss": 2.2831, "step": 3137 }, { "epoch": 0.42, "grad_norm": 0.3125, "learning_rate": 4.931858026138283e-05, "loss": 2.2743, "step": 3138 }, { "epoch": 0.42, "grad_norm": 0.287109375, "learning_rate": 4.931808935860887e-05, "loss": 2.2981, "step": 3139 }, { "epoch": 0.42, "grad_norm": 0.29296875, "learning_rate": 4.931759828151743e-05, "loss": 2.33, "step": 3140 }, { "epoch": 0.42, "grad_norm": 0.279296875, "learning_rate": 4.931710703011204e-05, "loss": 2.2653, "step": 3141 }, { "epoch": 0.42, "grad_norm": 0.28515625, "learning_rate": 4.931661560439623e-05, "loss": 2.2825, "step": 3142 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.93161240043735e-05, "loss": 2.2822, "step": 3143 }, { "epoch": 0.42, "grad_norm": 0.294921875, "learning_rate": 4.9315632230047406e-05, "loss": 2.2761, "step": 3144 }, { "epoch": 0.42, "grad_norm": 0.29296875, "learning_rate": 4.931514028142143e-05, "loss": 2.2902, "step": 3145 }, { "epoch": 0.42, "grad_norm": 0.28515625, "learning_rate": 4.9314648158499145e-05, "loss": 2.2675, "step": 3146 }, { "epoch": 0.42, "grad_norm": 0.29296875, "learning_rate": 4.931415586128405e-05, "loss": 2.2797, "step": 3147 }, { "epoch": 0.42, "grad_norm": 0.29296875, "learning_rate": 4.931366338977969e-05, "loss": 2.2993, "step": 3148 }, { "epoch": 0.42, "grad_norm": 0.30859375, "learning_rate": 4.9313170743989565e-05, "loss": 2.2782, "step": 3149 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.9312677923917244e-05, "loss": 2.2574, "step": 3150 }, { "epoch": 0.42, "grad_norm": 0.287109375, "learning_rate": 4.931218492956624e-05, "loss": 2.2523, "step": 3151 }, { "epoch": 0.42, "grad_norm": 0.283203125, "learning_rate": 4.9311691760940084e-05, "loss": 2.2623, "step": 3152 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.931119841804233e-05, "loss": 2.2628, "step": 3153 }, { "epoch": 0.42, "grad_norm": 0.28515625, "learning_rate": 4.9310704900876494e-05, "loss": 2.2932, "step": 3154 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.931021120944612e-05, "loss": 2.272, "step": 3155 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.930971734375475e-05, "loss": 2.2558, "step": 3156 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.930922330380592e-05, "loss": 2.2475, "step": 3157 }, { "epoch": 0.42, "grad_norm": 0.291015625, "learning_rate": 4.930872908960317e-05, "loss": 2.2879, "step": 3158 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.930823470115005e-05, "loss": 2.3118, "step": 3159 }, { "epoch": 0.42, "grad_norm": 0.294921875, "learning_rate": 4.93077401384501e-05, "loss": 2.2843, "step": 3160 }, { "epoch": 0.42, "grad_norm": 0.29296875, "learning_rate": 4.930724540150687e-05, "loss": 2.2679, "step": 3161 }, { "epoch": 0.42, "grad_norm": 0.3046875, "learning_rate": 4.93067504903239e-05, "loss": 2.2952, "step": 3162 }, { "epoch": 0.42, "grad_norm": 0.291015625, "learning_rate": 4.930625540490473e-05, "loss": 2.2555, "step": 3163 }, { "epoch": 0.42, "grad_norm": 0.291015625, "learning_rate": 4.930576014525293e-05, "loss": 2.2588, "step": 3164 }, { "epoch": 0.42, "grad_norm": 0.287109375, "learning_rate": 4.9305264711372034e-05, "loss": 2.2845, "step": 3165 }, { "epoch": 0.42, "grad_norm": 0.298828125, "learning_rate": 4.93047691032656e-05, "loss": 2.2767, "step": 3166 }, { "epoch": 0.42, "grad_norm": 0.3046875, "learning_rate": 4.930427332093717e-05, "loss": 2.2954, "step": 3167 }, { "epoch": 0.42, "grad_norm": 0.2890625, "learning_rate": 4.930377736439031e-05, "loss": 2.2739, "step": 3168 }, { "epoch": 0.42, "grad_norm": 0.328125, "learning_rate": 4.930328123362858e-05, "loss": 2.311, "step": 3169 }, { "epoch": 0.42, "grad_norm": 0.3203125, "learning_rate": 4.9302784928655523e-05, "loss": 2.247, "step": 3170 }, { "epoch": 0.42, "grad_norm": 0.291015625, "learning_rate": 4.93022884494747e-05, "loss": 2.305, "step": 3171 }, { "epoch": 0.42, "grad_norm": 0.3046875, "learning_rate": 4.930179179608967e-05, "loss": 2.3192, "step": 3172 }, { "epoch": 0.42, "grad_norm": 0.306640625, "learning_rate": 4.930129496850399e-05, "loss": 2.3059, "step": 3173 }, { "epoch": 0.42, "grad_norm": 0.29296875, "learning_rate": 4.9300797966721236e-05, "loss": 2.2852, "step": 3174 }, { "epoch": 0.42, "grad_norm": 0.26953125, "learning_rate": 4.9300300790744944e-05, "loss": 2.2793, "step": 3175 }, { "epoch": 0.42, "grad_norm": 0.314453125, "learning_rate": 4.929980344057871e-05, "loss": 2.267, "step": 3176 }, { "epoch": 0.42, "grad_norm": 0.298828125, "learning_rate": 4.929930591622608e-05, "loss": 2.2933, "step": 3177 }, { "epoch": 0.42, "grad_norm": 0.291015625, "learning_rate": 4.929880821769062e-05, "loss": 2.2572, "step": 3178 }, { "epoch": 0.42, "grad_norm": 0.30078125, "learning_rate": 4.929831034497591e-05, "loss": 2.2335, "step": 3179 }, { "epoch": 0.42, "grad_norm": 0.31640625, "learning_rate": 4.92978122980855e-05, "loss": 2.2742, "step": 3180 }, { "epoch": 0.42, "grad_norm": 0.294921875, "learning_rate": 4.9297314077022974e-05, "loss": 2.3103, "step": 3181 }, { "epoch": 0.42, "grad_norm": 0.2890625, "learning_rate": 4.9296815681791896e-05, "loss": 2.3016, "step": 3182 }, { "epoch": 0.42, "grad_norm": 0.27734375, "learning_rate": 4.929631711239585e-05, "loss": 2.2724, "step": 3183 }, { "epoch": 0.42, "grad_norm": 0.302734375, "learning_rate": 4.9295818368838395e-05, "loss": 2.2869, "step": 3184 }, { "epoch": 0.42, "grad_norm": 0.294921875, "learning_rate": 4.9295319451123115e-05, "loss": 2.313, "step": 3185 }, { "epoch": 0.42, "grad_norm": 0.294921875, "learning_rate": 4.9294820359253584e-05, "loss": 2.2646, "step": 3186 }, { "epoch": 0.43, "grad_norm": 0.298828125, "learning_rate": 4.929432109323338e-05, "loss": 2.2982, "step": 3187 }, { "epoch": 0.43, "grad_norm": 0.298828125, "learning_rate": 4.9293821653066085e-05, "loss": 2.3114, "step": 3188 }, { "epoch": 0.43, "grad_norm": 0.2890625, "learning_rate": 4.929332203875527e-05, "loss": 2.275, "step": 3189 }, { "epoch": 0.43, "grad_norm": 0.30859375, "learning_rate": 4.9292822250304535e-05, "loss": 2.2875, "step": 3190 }, { "epoch": 0.43, "grad_norm": 0.318359375, "learning_rate": 4.9292322287717444e-05, "loss": 2.2849, "step": 3191 }, { "epoch": 0.43, "grad_norm": 0.294921875, "learning_rate": 4.929182215099758e-05, "loss": 2.2616, "step": 3192 }, { "epoch": 0.43, "grad_norm": 0.2890625, "learning_rate": 4.929132184014854e-05, "loss": 2.2936, "step": 3193 }, { "epoch": 0.43, "grad_norm": 0.302734375, "learning_rate": 4.92908213551739e-05, "loss": 2.2941, "step": 3194 }, { "epoch": 0.43, "grad_norm": 0.296875, "learning_rate": 4.929032069607726e-05, "loss": 2.2787, "step": 3195 }, { "epoch": 0.43, "grad_norm": 0.294921875, "learning_rate": 4.92898198628622e-05, "loss": 2.2758, "step": 3196 }, { "epoch": 0.43, "grad_norm": 0.291015625, "learning_rate": 4.928931885553231e-05, "loss": 2.2804, "step": 3197 }, { "epoch": 0.43, "grad_norm": 0.287109375, "learning_rate": 4.928881767409119e-05, "loss": 2.2749, "step": 3198 }, { "epoch": 0.43, "grad_norm": 0.287109375, "learning_rate": 4.928831631854242e-05, "loss": 2.3171, "step": 3199 }, { "epoch": 0.43, "grad_norm": 0.279296875, "learning_rate": 4.9287814788889595e-05, "loss": 2.2706, "step": 3200 }, { "epoch": 0.43, "grad_norm": 0.296875, "learning_rate": 4.928731308513632e-05, "loss": 2.2953, "step": 3201 }, { "epoch": 0.43, "grad_norm": 0.29296875, "learning_rate": 4.928681120728619e-05, "loss": 2.286, "step": 3202 }, { "epoch": 0.43, "grad_norm": 0.294921875, "learning_rate": 4.9286309155342794e-05, "loss": 2.2564, "step": 3203 }, { "epoch": 0.43, "grad_norm": 0.310546875, "learning_rate": 4.928580692930973e-05, "loss": 2.2839, "step": 3204 }, { "epoch": 0.43, "grad_norm": 0.31640625, "learning_rate": 4.9285304529190615e-05, "loss": 2.2831, "step": 3205 }, { "epoch": 0.43, "grad_norm": 0.30859375, "learning_rate": 4.9284801954989027e-05, "loss": 2.2993, "step": 3206 }, { "epoch": 0.43, "grad_norm": 0.298828125, "learning_rate": 4.928429920670858e-05, "loss": 2.2473, "step": 3207 }, { "epoch": 0.43, "grad_norm": 0.30078125, "learning_rate": 4.928379628435289e-05, "loss": 2.276, "step": 3208 }, { "epoch": 0.43, "grad_norm": 0.294921875, "learning_rate": 4.928329318792554e-05, "loss": 2.283, "step": 3209 }, { "epoch": 0.43, "grad_norm": 0.302734375, "learning_rate": 4.928278991743015e-05, "loss": 2.3111, "step": 3210 }, { "epoch": 0.43, "grad_norm": 0.3125, "learning_rate": 4.928228647287033e-05, "loss": 2.3077, "step": 3211 }, { "epoch": 0.43, "grad_norm": 0.298828125, "learning_rate": 4.928178285424967e-05, "loss": 2.265, "step": 3212 }, { "epoch": 0.43, "grad_norm": 0.291015625, "learning_rate": 4.928127906157181e-05, "loss": 2.303, "step": 3213 }, { "epoch": 0.43, "grad_norm": 0.30859375, "learning_rate": 4.928077509484032e-05, "loss": 2.2742, "step": 3214 }, { "epoch": 0.43, "grad_norm": 0.296875, "learning_rate": 4.928027095405886e-05, "loss": 2.268, "step": 3215 }, { "epoch": 0.43, "grad_norm": 0.318359375, "learning_rate": 4.927976663923101e-05, "loss": 2.2403, "step": 3216 }, { "epoch": 0.43, "grad_norm": 0.302734375, "learning_rate": 4.927926215036039e-05, "loss": 2.2772, "step": 3217 }, { "epoch": 0.43, "grad_norm": 0.296875, "learning_rate": 4.927875748745063e-05, "loss": 2.2765, "step": 3218 }, { "epoch": 0.43, "grad_norm": 0.291015625, "learning_rate": 4.927825265050534e-05, "loss": 2.2879, "step": 3219 }, { "epoch": 0.43, "grad_norm": 0.30859375, "learning_rate": 4.9277747639528137e-05, "loss": 2.2589, "step": 3220 }, { "epoch": 0.43, "grad_norm": 0.31640625, "learning_rate": 4.927724245452264e-05, "loss": 2.2695, "step": 3221 }, { "epoch": 0.43, "grad_norm": 0.318359375, "learning_rate": 4.927673709549248e-05, "loss": 2.2825, "step": 3222 }, { "epoch": 0.43, "grad_norm": 0.287109375, "learning_rate": 4.927623156244126e-05, "loss": 2.2993, "step": 3223 }, { "epoch": 0.43, "grad_norm": 0.298828125, "learning_rate": 4.9275725855372625e-05, "loss": 2.2569, "step": 3224 }, { "epoch": 0.43, "grad_norm": 0.28515625, "learning_rate": 4.9275219974290185e-05, "loss": 2.2592, "step": 3225 }, { "epoch": 0.43, "grad_norm": 0.302734375, "learning_rate": 4.927471391919757e-05, "loss": 2.3157, "step": 3226 }, { "epoch": 0.43, "grad_norm": 0.3046875, "learning_rate": 4.927420769009842e-05, "loss": 2.2884, "step": 3227 }, { "epoch": 0.43, "grad_norm": 0.302734375, "learning_rate": 4.927370128699634e-05, "loss": 2.3219, "step": 3228 }, { "epoch": 0.43, "grad_norm": 0.294921875, "learning_rate": 4.927319470989499e-05, "loss": 2.2889, "step": 3229 }, { "epoch": 0.43, "grad_norm": 0.287109375, "learning_rate": 4.927268795879797e-05, "loss": 2.2627, "step": 3230 }, { "epoch": 0.43, "grad_norm": 0.28515625, "learning_rate": 4.927218103370893e-05, "loss": 2.2819, "step": 3231 }, { "epoch": 0.43, "grad_norm": 0.3125, "learning_rate": 4.92716739346315e-05, "loss": 2.2793, "step": 3232 }, { "epoch": 0.43, "grad_norm": 0.30078125, "learning_rate": 4.927116666156932e-05, "loss": 2.2868, "step": 3233 }, { "epoch": 0.43, "grad_norm": 0.291015625, "learning_rate": 4.9270659214526026e-05, "loss": 2.3089, "step": 3234 }, { "epoch": 0.43, "grad_norm": 0.306640625, "learning_rate": 4.927015159350524e-05, "loss": 2.2728, "step": 3235 }, { "epoch": 0.43, "grad_norm": 0.28125, "learning_rate": 4.926964379851062e-05, "loss": 2.2828, "step": 3236 }, { "epoch": 0.43, "grad_norm": 0.302734375, "learning_rate": 4.92691358295458e-05, "loss": 2.2562, "step": 3237 }, { "epoch": 0.43, "grad_norm": 0.279296875, "learning_rate": 4.926862768661442e-05, "loss": 2.2794, "step": 3238 }, { "epoch": 0.43, "grad_norm": 0.28515625, "learning_rate": 4.926811936972012e-05, "loss": 2.298, "step": 3239 }, { "epoch": 0.43, "grad_norm": 0.314453125, "learning_rate": 4.926761087886654e-05, "loss": 2.2814, "step": 3240 }, { "epoch": 0.43, "grad_norm": 0.296875, "learning_rate": 4.9267102214057335e-05, "loss": 2.2764, "step": 3241 }, { "epoch": 0.43, "grad_norm": 0.30859375, "learning_rate": 4.926659337529615e-05, "loss": 2.2838, "step": 3242 }, { "epoch": 0.43, "grad_norm": 0.306640625, "learning_rate": 4.926608436258663e-05, "loss": 2.2888, "step": 3243 }, { "epoch": 0.43, "grad_norm": 0.306640625, "learning_rate": 4.926557517593242e-05, "loss": 2.3123, "step": 3244 }, { "epoch": 0.43, "grad_norm": 0.291015625, "learning_rate": 4.9265065815337165e-05, "loss": 2.3063, "step": 3245 }, { "epoch": 0.43, "grad_norm": 0.314453125, "learning_rate": 4.9264556280804536e-05, "loss": 2.2778, "step": 3246 }, { "epoch": 0.43, "grad_norm": 0.306640625, "learning_rate": 4.926404657233817e-05, "loss": 2.2898, "step": 3247 }, { "epoch": 0.43, "grad_norm": 0.296875, "learning_rate": 4.926353668994173e-05, "loss": 2.3254, "step": 3248 }, { "epoch": 0.43, "grad_norm": 0.302734375, "learning_rate": 4.926302663361885e-05, "loss": 2.2882, "step": 3249 }, { "epoch": 0.43, "grad_norm": 0.306640625, "learning_rate": 4.926251640337322e-05, "loss": 2.2722, "step": 3250 }, { "epoch": 0.43, "grad_norm": 0.2890625, "learning_rate": 4.926200599920846e-05, "loss": 2.262, "step": 3251 }, { "epoch": 0.43, "grad_norm": 0.296875, "learning_rate": 4.926149542112826e-05, "loss": 2.2666, "step": 3252 }, { "epoch": 0.43, "grad_norm": 0.3203125, "learning_rate": 4.9260984669136266e-05, "loss": 2.232, "step": 3253 }, { "epoch": 0.43, "grad_norm": 0.296875, "learning_rate": 4.926047374323615e-05, "loss": 2.2837, "step": 3254 }, { "epoch": 0.43, "grad_norm": 0.298828125, "learning_rate": 4.9259962643431545e-05, "loss": 2.2745, "step": 3255 }, { "epoch": 0.43, "grad_norm": 0.302734375, "learning_rate": 4.925945136972615e-05, "loss": 2.25, "step": 3256 }, { "epoch": 0.43, "grad_norm": 0.28515625, "learning_rate": 4.9258939922123614e-05, "loss": 2.2761, "step": 3257 }, { "epoch": 0.43, "grad_norm": 0.296875, "learning_rate": 4.9258428300627605e-05, "loss": 2.2701, "step": 3258 }, { "epoch": 0.43, "grad_norm": 0.306640625, "learning_rate": 4.925791650524178e-05, "loss": 2.2537, "step": 3259 }, { "epoch": 0.43, "grad_norm": 0.296875, "learning_rate": 4.925740453596982e-05, "loss": 2.2889, "step": 3260 }, { "epoch": 0.43, "grad_norm": 0.314453125, "learning_rate": 4.9256892392815396e-05, "loss": 2.2707, "step": 3261 }, { "epoch": 0.44, "grad_norm": 0.314453125, "learning_rate": 4.925638007578217e-05, "loss": 2.2696, "step": 3262 }, { "epoch": 0.44, "grad_norm": 0.291015625, "learning_rate": 4.9255867584873824e-05, "loss": 2.3181, "step": 3263 }, { "epoch": 0.44, "grad_norm": 0.296875, "learning_rate": 4.9255354920094035e-05, "loss": 2.2351, "step": 3264 }, { "epoch": 0.44, "grad_norm": 0.2890625, "learning_rate": 4.925484208144646e-05, "loss": 2.2762, "step": 3265 }, { "epoch": 0.44, "grad_norm": 0.322265625, "learning_rate": 4.925432906893479e-05, "loss": 2.2469, "step": 3266 }, { "epoch": 0.44, "grad_norm": 0.296875, "learning_rate": 4.925381588256269e-05, "loss": 2.2855, "step": 3267 }, { "epoch": 0.44, "grad_norm": 0.29296875, "learning_rate": 4.9253302522333855e-05, "loss": 2.2571, "step": 3268 }, { "epoch": 0.44, "grad_norm": 0.30078125, "learning_rate": 4.925278898825195e-05, "loss": 2.2899, "step": 3269 }, { "epoch": 0.44, "grad_norm": 0.30078125, "learning_rate": 4.925227528032066e-05, "loss": 2.2725, "step": 3270 }, { "epoch": 0.44, "grad_norm": 0.3125, "learning_rate": 4.925176139854368e-05, "loss": 2.2744, "step": 3271 }, { "epoch": 0.44, "grad_norm": 0.296875, "learning_rate": 4.925124734292468e-05, "loss": 2.2852, "step": 3272 }, { "epoch": 0.44, "grad_norm": 0.314453125, "learning_rate": 4.925073311346734e-05, "loss": 2.2769, "step": 3273 }, { "epoch": 0.44, "grad_norm": 0.310546875, "learning_rate": 4.925021871017537e-05, "loss": 2.295, "step": 3274 }, { "epoch": 0.44, "grad_norm": 0.302734375, "learning_rate": 4.9249704133052425e-05, "loss": 2.2906, "step": 3275 }, { "epoch": 0.44, "grad_norm": 0.291015625, "learning_rate": 4.924918938210222e-05, "loss": 2.2875, "step": 3276 }, { "epoch": 0.44, "grad_norm": 0.28515625, "learning_rate": 4.9248674457328434e-05, "loss": 2.262, "step": 3277 }, { "epoch": 0.44, "grad_norm": 0.3203125, "learning_rate": 4.924815935873476e-05, "loss": 2.3046, "step": 3278 }, { "epoch": 0.44, "grad_norm": 0.29296875, "learning_rate": 4.924764408632488e-05, "loss": 2.2885, "step": 3279 }, { "epoch": 0.44, "grad_norm": 0.296875, "learning_rate": 4.92471286401025e-05, "loss": 2.2922, "step": 3280 }, { "epoch": 0.44, "grad_norm": 0.29296875, "learning_rate": 4.924661302007132e-05, "loss": 2.2631, "step": 3281 }, { "epoch": 0.44, "grad_norm": 0.306640625, "learning_rate": 4.924609722623503e-05, "loss": 2.2977, "step": 3282 }, { "epoch": 0.44, "grad_norm": 0.28125, "learning_rate": 4.924558125859732e-05, "loss": 2.2869, "step": 3283 }, { "epoch": 0.44, "grad_norm": 0.2890625, "learning_rate": 4.9245065117161893e-05, "loss": 2.264, "step": 3284 }, { "epoch": 0.44, "grad_norm": 0.28125, "learning_rate": 4.9244548801932445e-05, "loss": 2.3148, "step": 3285 }, { "epoch": 0.44, "grad_norm": 0.28515625, "learning_rate": 4.924403231291269e-05, "loss": 2.2772, "step": 3286 }, { "epoch": 0.44, "grad_norm": 0.291015625, "learning_rate": 4.924351565010632e-05, "loss": 2.2842, "step": 3287 }, { "epoch": 0.44, "grad_norm": 0.298828125, "learning_rate": 4.9242998813517037e-05, "loss": 2.279, "step": 3288 }, { "epoch": 0.44, "grad_norm": 0.30859375, "learning_rate": 4.924248180314855e-05, "loss": 2.2703, "step": 3289 }, { "epoch": 0.44, "grad_norm": 0.28125, "learning_rate": 4.924196461900457e-05, "loss": 2.2694, "step": 3290 }, { "epoch": 0.44, "grad_norm": 0.30078125, "learning_rate": 4.92414472610888e-05, "loss": 2.2963, "step": 3291 }, { "epoch": 0.44, "grad_norm": 0.298828125, "learning_rate": 4.924092972940494e-05, "loss": 2.2545, "step": 3292 }, { "epoch": 0.44, "grad_norm": 0.287109375, "learning_rate": 4.924041202395671e-05, "loss": 2.2869, "step": 3293 }, { "epoch": 0.44, "grad_norm": 0.283203125, "learning_rate": 4.923989414474782e-05, "loss": 2.2783, "step": 3294 }, { "epoch": 0.44, "grad_norm": 0.30859375, "learning_rate": 4.923937609178198e-05, "loss": 2.2788, "step": 3295 }, { "epoch": 0.44, "grad_norm": 0.30859375, "learning_rate": 4.923885786506291e-05, "loss": 2.235, "step": 3296 }, { "epoch": 0.44, "grad_norm": 0.2890625, "learning_rate": 4.9238339464594305e-05, "loss": 2.2642, "step": 3297 }, { "epoch": 0.44, "grad_norm": 0.28515625, "learning_rate": 4.9237820890379905e-05, "loss": 2.2844, "step": 3298 }, { "epoch": 0.44, "grad_norm": 0.28125, "learning_rate": 4.9237302142423416e-05, "loss": 2.2754, "step": 3299 }, { "epoch": 0.44, "grad_norm": 0.3046875, "learning_rate": 4.923678322072855e-05, "loss": 2.2887, "step": 3300 }, { "epoch": 0.44, "grad_norm": 0.30078125, "learning_rate": 4.923626412529904e-05, "loss": 2.3001, "step": 3301 }, { "epoch": 0.44, "grad_norm": 0.2890625, "learning_rate": 4.92357448561386e-05, "loss": 2.2751, "step": 3302 }, { "epoch": 0.44, "grad_norm": 0.306640625, "learning_rate": 4.923522541325095e-05, "loss": 2.2552, "step": 3303 }, { "epoch": 0.44, "grad_norm": 0.2890625, "learning_rate": 4.9234705796639825e-05, "loss": 2.2749, "step": 3304 }, { "epoch": 0.44, "grad_norm": 0.29296875, "learning_rate": 4.923418600630894e-05, "loss": 2.2868, "step": 3305 }, { "epoch": 0.44, "grad_norm": 0.310546875, "learning_rate": 4.9233666042262014e-05, "loss": 2.2787, "step": 3306 }, { "epoch": 0.44, "grad_norm": 0.294921875, "learning_rate": 4.923314590450279e-05, "loss": 2.2693, "step": 3307 }, { "epoch": 0.44, "grad_norm": 0.3046875, "learning_rate": 4.923262559303499e-05, "loss": 2.2856, "step": 3308 }, { "epoch": 0.44, "grad_norm": 0.291015625, "learning_rate": 4.923210510786234e-05, "loss": 2.248, "step": 3309 }, { "epoch": 0.44, "grad_norm": 0.291015625, "learning_rate": 4.923158444898858e-05, "loss": 2.2901, "step": 3310 }, { "epoch": 0.44, "grad_norm": 0.31640625, "learning_rate": 4.923106361641743e-05, "loss": 2.2156, "step": 3311 }, { "epoch": 0.44, "grad_norm": 0.275390625, "learning_rate": 4.923054261015263e-05, "loss": 2.2942, "step": 3312 }, { "epoch": 0.44, "grad_norm": 0.306640625, "learning_rate": 4.9230021430197916e-05, "loss": 2.279, "step": 3313 }, { "epoch": 0.44, "grad_norm": 0.29296875, "learning_rate": 4.922950007655702e-05, "loss": 2.2915, "step": 3314 }, { "epoch": 0.44, "grad_norm": 0.32421875, "learning_rate": 4.9228978549233685e-05, "loss": 2.2963, "step": 3315 }, { "epoch": 0.44, "grad_norm": 0.29296875, "learning_rate": 4.922845684823164e-05, "loss": 2.2595, "step": 3316 }, { "epoch": 0.44, "grad_norm": 0.298828125, "learning_rate": 4.9227934973554634e-05, "loss": 2.2733, "step": 3317 }, { "epoch": 0.44, "grad_norm": 0.28515625, "learning_rate": 4.9227412925206406e-05, "loss": 2.2844, "step": 3318 }, { "epoch": 0.44, "grad_norm": 0.2890625, "learning_rate": 4.92268907031907e-05, "loss": 2.2906, "step": 3319 }, { "epoch": 0.44, "grad_norm": 0.291015625, "learning_rate": 4.922636830751125e-05, "loss": 2.264, "step": 3320 }, { "epoch": 0.44, "grad_norm": 0.29296875, "learning_rate": 4.922584573817181e-05, "loss": 2.2769, "step": 3321 }, { "epoch": 0.44, "grad_norm": 0.3203125, "learning_rate": 4.922532299517612e-05, "loss": 2.2743, "step": 3322 }, { "epoch": 0.44, "grad_norm": 0.3046875, "learning_rate": 4.922480007852793e-05, "loss": 2.2778, "step": 3323 }, { "epoch": 0.44, "grad_norm": 0.2890625, "learning_rate": 4.922427698823099e-05, "loss": 2.2679, "step": 3324 }, { "epoch": 0.44, "grad_norm": 0.29296875, "learning_rate": 4.922375372428904e-05, "loss": 2.2745, "step": 3325 }, { "epoch": 0.44, "grad_norm": 0.2890625, "learning_rate": 4.922323028670585e-05, "loss": 2.274, "step": 3326 }, { "epoch": 0.44, "grad_norm": 0.306640625, "learning_rate": 4.9222706675485155e-05, "loss": 2.2356, "step": 3327 }, { "epoch": 0.44, "grad_norm": 0.2890625, "learning_rate": 4.922218289063072e-05, "loss": 2.2749, "step": 3328 }, { "epoch": 0.44, "grad_norm": 0.302734375, "learning_rate": 4.922165893214629e-05, "loss": 2.2916, "step": 3329 }, { "epoch": 0.44, "grad_norm": 0.291015625, "learning_rate": 4.922113480003562e-05, "loss": 2.2582, "step": 3330 }, { "epoch": 0.44, "grad_norm": 0.302734375, "learning_rate": 4.922061049430247e-05, "loss": 2.2771, "step": 3331 }, { "epoch": 0.44, "grad_norm": 0.298828125, "learning_rate": 4.922008601495061e-05, "loss": 2.3096, "step": 3332 }, { "epoch": 0.44, "grad_norm": 0.322265625, "learning_rate": 4.921956136198378e-05, "loss": 2.2593, "step": 3333 }, { "epoch": 0.44, "grad_norm": 0.3046875, "learning_rate": 4.921903653540575e-05, "loss": 2.2581, "step": 3334 }, { "epoch": 0.44, "grad_norm": 0.30078125, "learning_rate": 4.921851153522029e-05, "loss": 2.2765, "step": 3335 }, { "epoch": 0.44, "grad_norm": 0.322265625, "learning_rate": 4.921798636143115e-05, "loss": 2.2644, "step": 3336 }, { "epoch": 0.45, "grad_norm": 0.29296875, "learning_rate": 4.9217461014042106e-05, "loss": 2.2811, "step": 3337 }, { "epoch": 0.45, "grad_norm": 0.298828125, "learning_rate": 4.921693549305691e-05, "loss": 2.2499, "step": 3338 }, { "epoch": 0.45, "grad_norm": 0.318359375, "learning_rate": 4.921640979847935e-05, "loss": 2.2705, "step": 3339 }, { "epoch": 0.45, "grad_norm": 0.30078125, "learning_rate": 4.921588393031317e-05, "loss": 2.2446, "step": 3340 }, { "epoch": 0.45, "grad_norm": 0.3203125, "learning_rate": 4.921535788856215e-05, "loss": 2.2795, "step": 3341 }, { "epoch": 0.45, "grad_norm": 0.296875, "learning_rate": 4.921483167323007e-05, "loss": 2.296, "step": 3342 }, { "epoch": 0.45, "grad_norm": 0.30078125, "learning_rate": 4.921430528432069e-05, "loss": 2.2917, "step": 3343 }, { "epoch": 0.45, "grad_norm": 0.2890625, "learning_rate": 4.921377872183779e-05, "loss": 2.2989, "step": 3344 }, { "epoch": 0.45, "grad_norm": 0.310546875, "learning_rate": 4.9213251985785134e-05, "loss": 2.287, "step": 3345 }, { "epoch": 0.45, "grad_norm": 0.314453125, "learning_rate": 4.921272507616651e-05, "loss": 2.2656, "step": 3346 }, { "epoch": 0.45, "grad_norm": 0.294921875, "learning_rate": 4.9212197992985684e-05, "loss": 2.273, "step": 3347 }, { "epoch": 0.45, "grad_norm": 0.298828125, "learning_rate": 4.921167073624645e-05, "loss": 2.2908, "step": 3348 }, { "epoch": 0.45, "grad_norm": 0.2890625, "learning_rate": 4.921114330595258e-05, "loss": 2.2693, "step": 3349 }, { "epoch": 0.45, "grad_norm": 0.298828125, "learning_rate": 4.921061570210784e-05, "loss": 2.2635, "step": 3350 }, { "epoch": 0.45, "grad_norm": 0.294921875, "learning_rate": 4.921008792471604e-05, "loss": 2.2727, "step": 3351 }, { "epoch": 0.45, "grad_norm": 0.29296875, "learning_rate": 4.9209559973780937e-05, "loss": 2.2612, "step": 3352 }, { "epoch": 0.45, "grad_norm": 0.3046875, "learning_rate": 4.920903184930633e-05, "loss": 2.2805, "step": 3353 }, { "epoch": 0.45, "grad_norm": 0.306640625, "learning_rate": 4.9208503551296005e-05, "loss": 2.2702, "step": 3354 }, { "epoch": 0.45, "grad_norm": 0.314453125, "learning_rate": 4.920797507975374e-05, "loss": 2.2592, "step": 3355 }, { "epoch": 0.45, "grad_norm": 0.287109375, "learning_rate": 4.9207446434683334e-05, "loss": 2.3031, "step": 3356 }, { "epoch": 0.45, "grad_norm": 0.294921875, "learning_rate": 4.9206917616088565e-05, "loss": 2.2961, "step": 3357 }, { "epoch": 0.45, "grad_norm": 0.296875, "learning_rate": 4.9206388623973235e-05, "loss": 2.2349, "step": 3358 }, { "epoch": 0.45, "grad_norm": 0.2890625, "learning_rate": 4.920585945834112e-05, "loss": 2.2743, "step": 3359 }, { "epoch": 0.45, "grad_norm": 0.291015625, "learning_rate": 4.920533011919604e-05, "loss": 2.2803, "step": 3360 }, { "epoch": 0.45, "grad_norm": 0.31640625, "learning_rate": 4.9204800606541754e-05, "loss": 2.2687, "step": 3361 }, { "epoch": 0.45, "grad_norm": 0.30078125, "learning_rate": 4.920427092038209e-05, "loss": 2.291, "step": 3362 }, { "epoch": 0.45, "grad_norm": 0.29296875, "learning_rate": 4.9203741060720834e-05, "loss": 2.2775, "step": 3363 }, { "epoch": 0.45, "grad_norm": 0.296875, "learning_rate": 4.920321102756177e-05, "loss": 2.2813, "step": 3364 }, { "epoch": 0.45, "grad_norm": 0.333984375, "learning_rate": 4.920268082090872e-05, "loss": 2.2847, "step": 3365 }, { "epoch": 0.45, "grad_norm": 0.306640625, "learning_rate": 4.920215044076546e-05, "loss": 2.3034, "step": 3366 }, { "epoch": 0.45, "grad_norm": 0.28515625, "learning_rate": 4.920161988713582e-05, "loss": 2.2865, "step": 3367 }, { "epoch": 0.45, "grad_norm": 0.294921875, "learning_rate": 4.920108916002358e-05, "loss": 2.2488, "step": 3368 }, { "epoch": 0.45, "grad_norm": 0.287109375, "learning_rate": 4.920055825943256e-05, "loss": 2.2827, "step": 3369 }, { "epoch": 0.45, "grad_norm": 0.2890625, "learning_rate": 4.920002718536655e-05, "loss": 2.2865, "step": 3370 }, { "epoch": 0.45, "grad_norm": 0.322265625, "learning_rate": 4.919949593782938e-05, "loss": 2.301, "step": 3371 }, { "epoch": 0.45, "grad_norm": 0.30078125, "learning_rate": 4.919896451682483e-05, "loss": 2.2564, "step": 3372 }, { "epoch": 0.45, "grad_norm": 0.28125, "learning_rate": 4.919843292235673e-05, "loss": 2.2742, "step": 3373 }, { "epoch": 0.45, "grad_norm": 0.314453125, "learning_rate": 4.919790115442888e-05, "loss": 2.2636, "step": 3374 }, { "epoch": 0.45, "grad_norm": 0.296875, "learning_rate": 4.9197369213045096e-05, "loss": 2.2711, "step": 3375 }, { "epoch": 0.45, "grad_norm": 0.279296875, "learning_rate": 4.919683709820919e-05, "loss": 2.2409, "step": 3376 }, { "epoch": 0.45, "grad_norm": 0.30859375, "learning_rate": 4.919630480992498e-05, "loss": 2.2469, "step": 3377 }, { "epoch": 0.45, "grad_norm": 0.287109375, "learning_rate": 4.9195772348196275e-05, "loss": 2.2982, "step": 3378 }, { "epoch": 0.45, "grad_norm": 0.28125, "learning_rate": 4.91952397130269e-05, "loss": 2.2597, "step": 3379 }, { "epoch": 0.45, "grad_norm": 0.2890625, "learning_rate": 4.9194706904420664e-05, "loss": 2.2476, "step": 3380 }, { "epoch": 0.45, "grad_norm": 0.287109375, "learning_rate": 4.919417392238139e-05, "loss": 2.2594, "step": 3381 }, { "epoch": 0.45, "grad_norm": 0.302734375, "learning_rate": 4.9193640766912906e-05, "loss": 2.2498, "step": 3382 }, { "epoch": 0.45, "grad_norm": 0.302734375, "learning_rate": 4.919310743801903e-05, "loss": 2.311, "step": 3383 }, { "epoch": 0.45, "grad_norm": 0.298828125, "learning_rate": 4.919257393570357e-05, "loss": 2.2822, "step": 3384 }, { "epoch": 0.45, "grad_norm": 0.30078125, "learning_rate": 4.9192040259970365e-05, "loss": 2.2817, "step": 3385 }, { "epoch": 0.45, "grad_norm": 0.310546875, "learning_rate": 4.919150641082324e-05, "loss": 2.2538, "step": 3386 }, { "epoch": 0.45, "grad_norm": 0.30078125, "learning_rate": 4.919097238826601e-05, "loss": 2.2785, "step": 3387 }, { "epoch": 0.45, "grad_norm": 0.310546875, "learning_rate": 4.919043819230253e-05, "loss": 2.2405, "step": 3388 }, { "epoch": 0.45, "grad_norm": 0.30078125, "learning_rate": 4.91899038229366e-05, "loss": 2.2644, "step": 3389 }, { "epoch": 0.45, "grad_norm": 0.29296875, "learning_rate": 4.918936928017207e-05, "loss": 2.2491, "step": 3390 }, { "epoch": 0.45, "grad_norm": 0.28125, "learning_rate": 4.918883456401276e-05, "loss": 2.2556, "step": 3391 }, { "epoch": 0.45, "grad_norm": 0.287109375, "learning_rate": 4.91882996744625e-05, "loss": 2.2675, "step": 3392 }, { "epoch": 0.45, "grad_norm": 0.287109375, "learning_rate": 4.918776461152514e-05, "loss": 2.2794, "step": 3393 }, { "epoch": 0.45, "grad_norm": 0.283203125, "learning_rate": 4.91872293752045e-05, "loss": 2.2584, "step": 3394 }, { "epoch": 0.45, "grad_norm": 0.287109375, "learning_rate": 4.9186693965504425e-05, "loss": 2.3028, "step": 3395 }, { "epoch": 0.45, "grad_norm": 0.294921875, "learning_rate": 4.918615838242876e-05, "loss": 2.2536, "step": 3396 }, { "epoch": 0.45, "grad_norm": 0.287109375, "learning_rate": 4.918562262598133e-05, "loss": 2.2838, "step": 3397 }, { "epoch": 0.45, "grad_norm": 0.2890625, "learning_rate": 4.918508669616597e-05, "loss": 2.2492, "step": 3398 }, { "epoch": 0.45, "grad_norm": 0.310546875, "learning_rate": 4.918455059298654e-05, "loss": 2.2991, "step": 3399 }, { "epoch": 0.45, "grad_norm": 0.3046875, "learning_rate": 4.918401431644688e-05, "loss": 2.2975, "step": 3400 }, { "epoch": 0.45, "grad_norm": 0.3125, "learning_rate": 4.9183477866550834e-05, "loss": 2.2931, "step": 3401 }, { "epoch": 0.45, "grad_norm": 0.3046875, "learning_rate": 4.918294124330223e-05, "loss": 2.2783, "step": 3402 }, { "epoch": 0.45, "grad_norm": 0.287109375, "learning_rate": 4.918240444670493e-05, "loss": 2.3106, "step": 3403 }, { "epoch": 0.45, "grad_norm": 0.2890625, "learning_rate": 4.918186747676278e-05, "loss": 2.2863, "step": 3404 }, { "epoch": 0.45, "grad_norm": 0.287109375, "learning_rate": 4.9181330333479634e-05, "loss": 2.2684, "step": 3405 }, { "epoch": 0.45, "grad_norm": 0.29296875, "learning_rate": 4.918079301685933e-05, "loss": 2.2645, "step": 3406 }, { "epoch": 0.45, "grad_norm": 0.28125, "learning_rate": 4.918025552690573e-05, "loss": 2.2679, "step": 3407 }, { "epoch": 0.45, "grad_norm": 0.291015625, "learning_rate": 4.917971786362269e-05, "loss": 2.2872, "step": 3408 }, { "epoch": 0.45, "grad_norm": 0.296875, "learning_rate": 4.9179180027014046e-05, "loss": 2.3015, "step": 3409 }, { "epoch": 0.45, "grad_norm": 0.30078125, "learning_rate": 4.917864201708367e-05, "loss": 2.3067, "step": 3410 }, { "epoch": 0.45, "grad_norm": 0.291015625, "learning_rate": 4.917810383383541e-05, "loss": 2.2722, "step": 3411 }, { "epoch": 0.46, "grad_norm": 0.275390625, "learning_rate": 4.9177565477273134e-05, "loss": 2.2987, "step": 3412 }, { "epoch": 0.46, "grad_norm": 0.30078125, "learning_rate": 4.917702694740069e-05, "loss": 2.2905, "step": 3413 }, { "epoch": 0.46, "grad_norm": 0.306640625, "learning_rate": 4.917648824422194e-05, "loss": 2.2928, "step": 3414 }, { "epoch": 0.46, "grad_norm": 0.287109375, "learning_rate": 4.917594936774075e-05, "loss": 2.2623, "step": 3415 }, { "epoch": 0.46, "grad_norm": 0.283203125, "learning_rate": 4.917541031796098e-05, "loss": 2.2623, "step": 3416 }, { "epoch": 0.46, "grad_norm": 0.30859375, "learning_rate": 4.91748710948865e-05, "loss": 2.2894, "step": 3417 }, { "epoch": 0.46, "grad_norm": 0.29296875, "learning_rate": 4.917433169852116e-05, "loss": 2.2638, "step": 3418 }, { "epoch": 0.46, "grad_norm": 0.306640625, "learning_rate": 4.9173792128868844e-05, "loss": 2.2904, "step": 3419 }, { "epoch": 0.46, "grad_norm": 0.2890625, "learning_rate": 4.917325238593341e-05, "loss": 2.2476, "step": 3420 }, { "epoch": 0.46, "grad_norm": 0.29296875, "learning_rate": 4.917271246971874e-05, "loss": 2.2696, "step": 3421 }, { "epoch": 0.46, "grad_norm": 0.3125, "learning_rate": 4.917217238022868e-05, "loss": 2.2749, "step": 3422 }, { "epoch": 0.46, "grad_norm": 0.3046875, "learning_rate": 4.917163211746712e-05, "loss": 2.2953, "step": 3423 }, { "epoch": 0.46, "grad_norm": 0.302734375, "learning_rate": 4.917109168143793e-05, "loss": 2.2689, "step": 3424 }, { "epoch": 0.46, "grad_norm": 0.302734375, "learning_rate": 4.917055107214498e-05, "loss": 2.3018, "step": 3425 }, { "epoch": 0.46, "grad_norm": 0.298828125, "learning_rate": 4.917001028959214e-05, "loss": 2.3126, "step": 3426 }, { "epoch": 0.46, "grad_norm": 0.2890625, "learning_rate": 4.91694693337833e-05, "loss": 2.2585, "step": 3427 }, { "epoch": 0.46, "grad_norm": 0.294921875, "learning_rate": 4.916892820472233e-05, "loss": 2.2856, "step": 3428 }, { "epoch": 0.46, "grad_norm": 0.283203125, "learning_rate": 4.916838690241311e-05, "loss": 2.2736, "step": 3429 }, { "epoch": 0.46, "grad_norm": 0.29296875, "learning_rate": 4.916784542685952e-05, "loss": 2.3062, "step": 3430 }, { "epoch": 0.46, "grad_norm": 0.294921875, "learning_rate": 4.9167303778065445e-05, "loss": 2.2926, "step": 3431 }, { "epoch": 0.46, "grad_norm": 0.3046875, "learning_rate": 4.916676195603476e-05, "loss": 2.2715, "step": 3432 }, { "epoch": 0.46, "grad_norm": 0.30859375, "learning_rate": 4.9166219960771355e-05, "loss": 2.2811, "step": 3433 }, { "epoch": 0.46, "grad_norm": 0.283203125, "learning_rate": 4.916567779227912e-05, "loss": 2.2761, "step": 3434 }, { "epoch": 0.46, "grad_norm": 0.3046875, "learning_rate": 4.916513545056193e-05, "loss": 2.2871, "step": 3435 }, { "epoch": 0.46, "grad_norm": 0.298828125, "learning_rate": 4.916459293562367e-05, "loss": 2.2581, "step": 3436 }, { "epoch": 0.46, "grad_norm": 0.2890625, "learning_rate": 4.916405024746824e-05, "loss": 2.2737, "step": 3437 }, { "epoch": 0.46, "grad_norm": 0.28515625, "learning_rate": 4.916350738609953e-05, "loss": 2.2704, "step": 3438 }, { "epoch": 0.46, "grad_norm": 0.27734375, "learning_rate": 4.916296435152142e-05, "loss": 2.2928, "step": 3439 }, { "epoch": 0.46, "grad_norm": 0.28515625, "learning_rate": 4.9162421143737825e-05, "loss": 2.2814, "step": 3440 }, { "epoch": 0.46, "grad_norm": 0.29296875, "learning_rate": 4.916187776275261e-05, "loss": 2.2788, "step": 3441 }, { "epoch": 0.46, "grad_norm": 0.294921875, "learning_rate": 4.916133420856969e-05, "loss": 2.2772, "step": 3442 }, { "epoch": 0.46, "grad_norm": 0.341796875, "learning_rate": 4.916079048119295e-05, "loss": 2.2747, "step": 3443 }, { "epoch": 0.46, "grad_norm": 0.296875, "learning_rate": 4.91602465806263e-05, "loss": 2.3034, "step": 3444 }, { "epoch": 0.46, "grad_norm": 0.31640625, "learning_rate": 4.915970250687363e-05, "loss": 2.2453, "step": 3445 }, { "epoch": 0.46, "grad_norm": 0.310546875, "learning_rate": 4.915915825993885e-05, "loss": 2.2487, "step": 3446 }, { "epoch": 0.46, "grad_norm": 0.283203125, "learning_rate": 4.915861383982584e-05, "loss": 2.2661, "step": 3447 }, { "epoch": 0.46, "grad_norm": 0.294921875, "learning_rate": 4.915806924653852e-05, "loss": 2.2764, "step": 3448 }, { "epoch": 0.46, "grad_norm": 0.302734375, "learning_rate": 4.9157524480080796e-05, "loss": 2.2941, "step": 3449 }, { "epoch": 0.46, "grad_norm": 0.287109375, "learning_rate": 4.915697954045656e-05, "loss": 2.2688, "step": 3450 }, { "epoch": 0.46, "grad_norm": 0.31640625, "learning_rate": 4.915643442766972e-05, "loss": 2.2825, "step": 3451 }, { "epoch": 0.46, "grad_norm": 0.28515625, "learning_rate": 4.9155889141724195e-05, "loss": 2.2646, "step": 3452 }, { "epoch": 0.46, "grad_norm": 0.302734375, "learning_rate": 4.915534368262389e-05, "loss": 2.2842, "step": 3453 }, { "epoch": 0.46, "grad_norm": 0.296875, "learning_rate": 4.915479805037271e-05, "loss": 2.2869, "step": 3454 }, { "epoch": 0.46, "grad_norm": 0.2890625, "learning_rate": 4.9154252244974566e-05, "loss": 2.2936, "step": 3455 }, { "epoch": 0.46, "grad_norm": 0.302734375, "learning_rate": 4.9153706266433366e-05, "loss": 2.2641, "step": 3456 }, { "epoch": 0.46, "grad_norm": 0.29296875, "learning_rate": 4.915316011475304e-05, "loss": 2.2673, "step": 3457 }, { "epoch": 0.46, "grad_norm": 0.283203125, "learning_rate": 4.915261378993749e-05, "loss": 2.2714, "step": 3458 }, { "epoch": 0.46, "grad_norm": 0.318359375, "learning_rate": 4.915206729199063e-05, "loss": 2.2884, "step": 3459 }, { "epoch": 0.46, "grad_norm": 0.30859375, "learning_rate": 4.915152062091639e-05, "loss": 2.2965, "step": 3460 }, { "epoch": 0.46, "grad_norm": 0.298828125, "learning_rate": 4.9150973776718686e-05, "loss": 2.254, "step": 3461 }, { "epoch": 0.46, "grad_norm": 0.3125, "learning_rate": 4.9150426759401426e-05, "loss": 2.2777, "step": 3462 }, { "epoch": 0.46, "grad_norm": 0.3046875, "learning_rate": 4.9149879568968535e-05, "loss": 2.2531, "step": 3463 }, { "epoch": 0.46, "grad_norm": 0.287109375, "learning_rate": 4.914933220542395e-05, "loss": 2.2628, "step": 3464 }, { "epoch": 0.46, "grad_norm": 0.30078125, "learning_rate": 4.914878466877157e-05, "loss": 2.2399, "step": 3465 }, { "epoch": 0.46, "grad_norm": 0.298828125, "learning_rate": 4.914823695901535e-05, "loss": 2.2555, "step": 3466 }, { "epoch": 0.46, "grad_norm": 0.298828125, "learning_rate": 4.9147689076159186e-05, "loss": 2.2781, "step": 3467 }, { "epoch": 0.46, "grad_norm": 0.294921875, "learning_rate": 4.914714102020702e-05, "loss": 2.2557, "step": 3468 }, { "epoch": 0.46, "grad_norm": 0.296875, "learning_rate": 4.914659279116279e-05, "loss": 2.2549, "step": 3469 }, { "epoch": 0.46, "grad_norm": 0.298828125, "learning_rate": 4.91460443890304e-05, "loss": 2.2612, "step": 3470 }, { "epoch": 0.46, "grad_norm": 0.328125, "learning_rate": 4.914549581381381e-05, "loss": 2.2779, "step": 3471 }, { "epoch": 0.46, "grad_norm": 0.306640625, "learning_rate": 4.9144947065516934e-05, "loss": 2.2686, "step": 3472 }, { "epoch": 0.46, "grad_norm": 0.302734375, "learning_rate": 4.914439814414371e-05, "loss": 2.2597, "step": 3473 }, { "epoch": 0.46, "grad_norm": 0.302734375, "learning_rate": 4.914384904969807e-05, "loss": 2.2971, "step": 3474 }, { "epoch": 0.46, "grad_norm": 0.294921875, "learning_rate": 4.9143299782183954e-05, "loss": 2.2436, "step": 3475 }, { "epoch": 0.46, "grad_norm": 0.294921875, "learning_rate": 4.91427503416053e-05, "loss": 2.2726, "step": 3476 }, { "epoch": 0.46, "grad_norm": 0.29296875, "learning_rate": 4.9142200727966046e-05, "loss": 2.259, "step": 3477 }, { "epoch": 0.46, "grad_norm": 0.283203125, "learning_rate": 4.914165094127013e-05, "loss": 2.2696, "step": 3478 }, { "epoch": 0.46, "grad_norm": 0.302734375, "learning_rate": 4.9141100981521494e-05, "loss": 2.311, "step": 3479 }, { "epoch": 0.46, "grad_norm": 0.3046875, "learning_rate": 4.914055084872408e-05, "loss": 2.2651, "step": 3480 }, { "epoch": 0.46, "grad_norm": 0.29296875, "learning_rate": 4.914000054288183e-05, "loss": 2.2812, "step": 3481 }, { "epoch": 0.46, "grad_norm": 0.302734375, "learning_rate": 4.913945006399868e-05, "loss": 2.2596, "step": 3482 }, { "epoch": 0.46, "grad_norm": 0.283203125, "learning_rate": 4.91388994120786e-05, "loss": 2.2891, "step": 3483 }, { "epoch": 0.46, "grad_norm": 0.291015625, "learning_rate": 4.913834858712552e-05, "loss": 2.2454, "step": 3484 }, { "epoch": 0.46, "grad_norm": 0.31640625, "learning_rate": 4.9137797589143386e-05, "loss": 2.3223, "step": 3485 }, { "epoch": 0.47, "grad_norm": 0.296875, "learning_rate": 4.913724641813616e-05, "loss": 2.2622, "step": 3486 }, { "epoch": 0.47, "grad_norm": 0.287109375, "learning_rate": 4.9136695074107786e-05, "loss": 2.2761, "step": 3487 }, { "epoch": 0.47, "grad_norm": 0.296875, "learning_rate": 4.913614355706221e-05, "loss": 2.2438, "step": 3488 }, { "epoch": 0.47, "grad_norm": 0.296875, "learning_rate": 4.9135591867003395e-05, "loss": 2.2751, "step": 3489 }, { "epoch": 0.47, "grad_norm": 0.28515625, "learning_rate": 4.9135040003935294e-05, "loss": 2.313, "step": 3490 }, { "epoch": 0.47, "grad_norm": 0.30078125, "learning_rate": 4.9134487967861865e-05, "loss": 2.2675, "step": 3491 }, { "epoch": 0.47, "grad_norm": 0.298828125, "learning_rate": 4.913393575878705e-05, "loss": 2.3098, "step": 3492 }, { "epoch": 0.47, "grad_norm": 0.279296875, "learning_rate": 4.9133383376714827e-05, "loss": 2.2947, "step": 3493 }, { "epoch": 0.47, "grad_norm": 0.29296875, "learning_rate": 4.9132830821649146e-05, "loss": 2.2403, "step": 3494 }, { "epoch": 0.47, "grad_norm": 0.29296875, "learning_rate": 4.9132278093593966e-05, "loss": 2.3134, "step": 3495 }, { "epoch": 0.47, "grad_norm": 0.3125, "learning_rate": 4.913172519255326e-05, "loss": 2.2759, "step": 3496 }, { "epoch": 0.47, "grad_norm": 0.3046875, "learning_rate": 4.913117211853098e-05, "loss": 2.3053, "step": 3497 }, { "epoch": 0.47, "grad_norm": 0.287109375, "learning_rate": 4.913061887153109e-05, "loss": 2.2557, "step": 3498 }, { "epoch": 0.47, "grad_norm": 0.291015625, "learning_rate": 4.9130065451557564e-05, "loss": 2.282, "step": 3499 }, { "epoch": 0.47, "grad_norm": 0.31640625, "learning_rate": 4.912951185861436e-05, "loss": 2.274, "step": 3500 }, { "epoch": 0.47, "grad_norm": 0.298828125, "learning_rate": 4.9128958092705454e-05, "loss": 2.2888, "step": 3501 }, { "epoch": 0.47, "grad_norm": 0.302734375, "learning_rate": 4.912840415383482e-05, "loss": 2.2912, "step": 3502 }, { "epoch": 0.47, "grad_norm": 0.3046875, "learning_rate": 4.912785004200641e-05, "loss": 2.3118, "step": 3503 }, { "epoch": 0.47, "grad_norm": 0.296875, "learning_rate": 4.912729575722421e-05, "loss": 2.2742, "step": 3504 }, { "epoch": 0.47, "grad_norm": 0.29296875, "learning_rate": 4.91267412994922e-05, "loss": 2.2695, "step": 3505 }, { "epoch": 0.47, "grad_norm": 0.3359375, "learning_rate": 4.9126186668814334e-05, "loss": 2.2745, "step": 3506 }, { "epoch": 0.47, "grad_norm": 0.28125, "learning_rate": 4.91256318651946e-05, "loss": 2.2732, "step": 3507 }, { "epoch": 0.47, "grad_norm": 0.30859375, "learning_rate": 4.912507688863698e-05, "loss": 2.2226, "step": 3508 }, { "epoch": 0.47, "grad_norm": 0.3203125, "learning_rate": 4.9124521739145444e-05, "loss": 2.2695, "step": 3509 }, { "epoch": 0.47, "grad_norm": 0.314453125, "learning_rate": 4.9123966416723974e-05, "loss": 2.2793, "step": 3510 }, { "epoch": 0.47, "grad_norm": 0.294921875, "learning_rate": 4.9123410921376547e-05, "loss": 2.2854, "step": 3511 }, { "epoch": 0.47, "grad_norm": 0.31640625, "learning_rate": 4.912285525310715e-05, "loss": 2.2497, "step": 3512 }, { "epoch": 0.47, "grad_norm": 0.287109375, "learning_rate": 4.9122299411919765e-05, "loss": 2.2778, "step": 3513 }, { "epoch": 0.47, "grad_norm": 0.302734375, "learning_rate": 4.9121743397818374e-05, "loss": 2.2894, "step": 3514 }, { "epoch": 0.47, "grad_norm": 0.29296875, "learning_rate": 4.912118721080696e-05, "loss": 2.2936, "step": 3515 }, { "epoch": 0.47, "grad_norm": 0.306640625, "learning_rate": 4.912063085088953e-05, "loss": 2.2827, "step": 3516 }, { "epoch": 0.47, "grad_norm": 0.3125, "learning_rate": 4.912007431807004e-05, "loss": 2.2666, "step": 3517 }, { "epoch": 0.47, "grad_norm": 0.30078125, "learning_rate": 4.91195176123525e-05, "loss": 2.2802, "step": 3518 }, { "epoch": 0.47, "grad_norm": 0.3203125, "learning_rate": 4.9118960733740894e-05, "loss": 2.2736, "step": 3519 }, { "epoch": 0.47, "grad_norm": 0.2890625, "learning_rate": 4.911840368223922e-05, "loss": 2.2484, "step": 3520 }, { "epoch": 0.47, "grad_norm": 0.29296875, "learning_rate": 4.911784645785147e-05, "loss": 2.2401, "step": 3521 }, { "epoch": 0.47, "grad_norm": 0.302734375, "learning_rate": 4.911728906058163e-05, "loss": 2.271, "step": 3522 }, { "epoch": 0.47, "grad_norm": 0.275390625, "learning_rate": 4.91167314904337e-05, "loss": 2.2498, "step": 3523 }, { "epoch": 0.47, "grad_norm": 0.283203125, "learning_rate": 4.911617374741168e-05, "loss": 2.2505, "step": 3524 }, { "epoch": 0.47, "grad_norm": 0.3046875, "learning_rate": 4.911561583151957e-05, "loss": 2.3011, "step": 3525 }, { "epoch": 0.47, "grad_norm": 0.294921875, "learning_rate": 4.9115057742761364e-05, "loss": 2.2509, "step": 3526 }, { "epoch": 0.47, "grad_norm": 0.314453125, "learning_rate": 4.911449948114105e-05, "loss": 2.2761, "step": 3527 }, { "epoch": 0.47, "grad_norm": 0.296875, "learning_rate": 4.911394104666266e-05, "loss": 2.2738, "step": 3528 }, { "epoch": 0.47, "grad_norm": 0.298828125, "learning_rate": 4.9113382439330166e-05, "loss": 2.3064, "step": 3529 }, { "epoch": 0.47, "grad_norm": 0.296875, "learning_rate": 4.91128236591476e-05, "loss": 2.2756, "step": 3530 }, { "epoch": 0.47, "grad_norm": 0.3046875, "learning_rate": 4.911226470611895e-05, "loss": 2.2618, "step": 3531 }, { "epoch": 0.47, "grad_norm": 0.28515625, "learning_rate": 4.9111705580248224e-05, "loss": 2.2829, "step": 3532 }, { "epoch": 0.47, "grad_norm": 0.298828125, "learning_rate": 4.911114628153943e-05, "loss": 2.2504, "step": 3533 }, { "epoch": 0.47, "grad_norm": 0.310546875, "learning_rate": 4.9110586809996586e-05, "loss": 2.2879, "step": 3534 }, { "epoch": 0.47, "grad_norm": 0.298828125, "learning_rate": 4.9110027165623696e-05, "loss": 2.2899, "step": 3535 }, { "epoch": 0.47, "grad_norm": 0.3125, "learning_rate": 4.9109467348424764e-05, "loss": 2.2858, "step": 3536 }, { "epoch": 0.47, "grad_norm": 0.306640625, "learning_rate": 4.9108907358403824e-05, "loss": 2.2664, "step": 3537 }, { "epoch": 0.47, "grad_norm": 0.3125, "learning_rate": 4.9108347195564865e-05, "loss": 2.2723, "step": 3538 }, { "epoch": 0.47, "grad_norm": 0.298828125, "learning_rate": 4.910778685991192e-05, "loss": 2.2803, "step": 3539 }, { "epoch": 0.47, "grad_norm": 0.30078125, "learning_rate": 4.9107226351449e-05, "loss": 2.2502, "step": 3540 }, { "epoch": 0.47, "grad_norm": 0.30859375, "learning_rate": 4.910666567018012e-05, "loss": 2.2541, "step": 3541 }, { "epoch": 0.47, "grad_norm": 0.28515625, "learning_rate": 4.91061048161093e-05, "loss": 2.2837, "step": 3542 }, { "epoch": 0.47, "grad_norm": 0.29296875, "learning_rate": 4.910554378924056e-05, "loss": 2.2731, "step": 3543 }, { "epoch": 0.47, "grad_norm": 0.29296875, "learning_rate": 4.910498258957793e-05, "loss": 2.2742, "step": 3544 }, { "epoch": 0.47, "grad_norm": 0.287109375, "learning_rate": 4.910442121712543e-05, "loss": 2.2745, "step": 3545 }, { "epoch": 0.47, "grad_norm": 0.318359375, "learning_rate": 4.910385967188707e-05, "loss": 2.2758, "step": 3546 }, { "epoch": 0.47, "grad_norm": 0.310546875, "learning_rate": 4.910329795386689e-05, "loss": 2.2648, "step": 3547 }, { "epoch": 0.47, "grad_norm": 0.310546875, "learning_rate": 4.910273606306891e-05, "loss": 2.2506, "step": 3548 }, { "epoch": 0.47, "grad_norm": 0.310546875, "learning_rate": 4.910217399949717e-05, "loss": 2.2706, "step": 3549 }, { "epoch": 0.47, "grad_norm": 0.283203125, "learning_rate": 4.910161176315568e-05, "loss": 2.2625, "step": 3550 }, { "epoch": 0.47, "grad_norm": 0.298828125, "learning_rate": 4.9101049354048486e-05, "loss": 2.2653, "step": 3551 }, { "epoch": 0.47, "grad_norm": 0.302734375, "learning_rate": 4.9100486772179606e-05, "loss": 2.2825, "step": 3552 }, { "epoch": 0.47, "grad_norm": 0.322265625, "learning_rate": 4.909992401755308e-05, "loss": 2.2948, "step": 3553 }, { "epoch": 0.47, "grad_norm": 0.296875, "learning_rate": 4.9099361090172945e-05, "loss": 2.2636, "step": 3554 }, { "epoch": 0.47, "grad_norm": 0.296875, "learning_rate": 4.9098797990043236e-05, "loss": 2.2508, "step": 3555 }, { "epoch": 0.47, "grad_norm": 0.30078125, "learning_rate": 4.909823471716797e-05, "loss": 2.2978, "step": 3556 }, { "epoch": 0.47, "grad_norm": 0.298828125, "learning_rate": 4.909767127155122e-05, "loss": 2.2759, "step": 3557 }, { "epoch": 0.47, "grad_norm": 0.294921875, "learning_rate": 4.909710765319699e-05, "loss": 2.2721, "step": 3558 }, { "epoch": 0.47, "grad_norm": 0.279296875, "learning_rate": 4.9096543862109345e-05, "loss": 2.2538, "step": 3559 }, { "epoch": 0.47, "grad_norm": 0.275390625, "learning_rate": 4.909597989829231e-05, "loss": 2.262, "step": 3560 }, { "epoch": 0.48, "grad_norm": 0.30859375, "learning_rate": 4.909541576174995e-05, "loss": 2.2539, "step": 3561 }, { "epoch": 0.48, "grad_norm": 0.30078125, "learning_rate": 4.909485145248628e-05, "loss": 2.2812, "step": 3562 }, { "epoch": 0.48, "grad_norm": 0.291015625, "learning_rate": 4.9094286970505364e-05, "loss": 2.2958, "step": 3563 }, { "epoch": 0.48, "grad_norm": 0.298828125, "learning_rate": 4.909372231581124e-05, "loss": 2.2782, "step": 3564 }, { "epoch": 0.48, "grad_norm": 0.298828125, "learning_rate": 4.909315748840796e-05, "loss": 2.2669, "step": 3565 }, { "epoch": 0.48, "grad_norm": 0.32421875, "learning_rate": 4.909259248829957e-05, "loss": 2.2464, "step": 3566 }, { "epoch": 0.48, "grad_norm": 0.302734375, "learning_rate": 4.9092027315490124e-05, "loss": 2.2846, "step": 3567 }, { "epoch": 0.48, "grad_norm": 0.31640625, "learning_rate": 4.9091461969983665e-05, "loss": 2.2797, "step": 3568 }, { "epoch": 0.48, "grad_norm": 0.29296875, "learning_rate": 4.9090896451784264e-05, "loss": 2.2594, "step": 3569 }, { "epoch": 0.48, "grad_norm": 0.306640625, "learning_rate": 4.909033076089595e-05, "loss": 2.2841, "step": 3570 }, { "epoch": 0.48, "grad_norm": 0.294921875, "learning_rate": 4.908976489732279e-05, "loss": 2.2986, "step": 3571 }, { "epoch": 0.48, "grad_norm": 0.3046875, "learning_rate": 4.9089198861068844e-05, "loss": 2.2701, "step": 3572 }, { "epoch": 0.48, "grad_norm": 0.337890625, "learning_rate": 4.9088632652138164e-05, "loss": 2.2723, "step": 3573 }, { "epoch": 0.48, "grad_norm": 0.318359375, "learning_rate": 4.908806627053482e-05, "loss": 2.2712, "step": 3574 }, { "epoch": 0.48, "grad_norm": 0.302734375, "learning_rate": 4.9087499716262855e-05, "loss": 2.3084, "step": 3575 }, { "epoch": 0.48, "grad_norm": 0.3046875, "learning_rate": 4.908693298932634e-05, "loss": 2.2761, "step": 3576 }, { "epoch": 0.48, "grad_norm": 0.30859375, "learning_rate": 4.908636608972932e-05, "loss": 2.2722, "step": 3577 }, { "epoch": 0.48, "grad_norm": 0.30078125, "learning_rate": 4.908579901747589e-05, "loss": 2.3017, "step": 3578 }, { "epoch": 0.48, "grad_norm": 0.330078125, "learning_rate": 4.90852317725701e-05, "loss": 2.2893, "step": 3579 }, { "epoch": 0.48, "grad_norm": 0.302734375, "learning_rate": 4.908466435501601e-05, "loss": 2.2971, "step": 3580 }, { "epoch": 0.48, "grad_norm": 0.2890625, "learning_rate": 4.908409676481769e-05, "loss": 2.2941, "step": 3581 }, { "epoch": 0.48, "grad_norm": 0.296875, "learning_rate": 4.908352900197921e-05, "loss": 2.2816, "step": 3582 }, { "epoch": 0.48, "grad_norm": 0.296875, "learning_rate": 4.908296106650464e-05, "loss": 2.2948, "step": 3583 }, { "epoch": 0.48, "grad_norm": 0.30078125, "learning_rate": 4.9082392958398055e-05, "loss": 2.2724, "step": 3584 }, { "epoch": 0.48, "grad_norm": 0.31640625, "learning_rate": 4.9081824677663526e-05, "loss": 2.2869, "step": 3585 }, { "epoch": 0.48, "grad_norm": 0.298828125, "learning_rate": 4.908125622430512e-05, "loss": 2.2422, "step": 3586 }, { "epoch": 0.48, "grad_norm": 0.296875, "learning_rate": 4.908068759832691e-05, "loss": 2.2722, "step": 3587 }, { "epoch": 0.48, "grad_norm": 0.314453125, "learning_rate": 4.908011879973299e-05, "loss": 2.2716, "step": 3588 }, { "epoch": 0.48, "grad_norm": 0.283203125, "learning_rate": 4.907954982852743e-05, "loss": 2.2764, "step": 3589 }, { "epoch": 0.48, "grad_norm": 0.302734375, "learning_rate": 4.907898068471429e-05, "loss": 2.287, "step": 3590 }, { "epoch": 0.48, "grad_norm": 0.3046875, "learning_rate": 4.9078411368297675e-05, "loss": 2.278, "step": 3591 }, { "epoch": 0.48, "grad_norm": 0.28125, "learning_rate": 4.9077841879281645e-05, "loss": 2.2542, "step": 3592 }, { "epoch": 0.48, "grad_norm": 0.28125, "learning_rate": 4.90772722176703e-05, "loss": 2.2722, "step": 3593 }, { "epoch": 0.48, "grad_norm": 0.298828125, "learning_rate": 4.907670238346771e-05, "loss": 2.2908, "step": 3594 }, { "epoch": 0.48, "grad_norm": 0.291015625, "learning_rate": 4.907613237667796e-05, "loss": 2.2641, "step": 3595 }, { "epoch": 0.48, "grad_norm": 0.302734375, "learning_rate": 4.9075562197305155e-05, "loss": 2.3102, "step": 3596 }, { "epoch": 0.48, "grad_norm": 0.296875, "learning_rate": 4.9074991845353365e-05, "loss": 2.2521, "step": 3597 }, { "epoch": 0.48, "grad_norm": 0.302734375, "learning_rate": 4.907442132082668e-05, "loss": 2.2709, "step": 3598 }, { "epoch": 0.48, "grad_norm": 0.275390625, "learning_rate": 4.907385062372919e-05, "loss": 2.2617, "step": 3599 }, { "epoch": 0.48, "grad_norm": 0.296875, "learning_rate": 4.907327975406498e-05, "loss": 2.2919, "step": 3600 }, { "epoch": 0.48, "grad_norm": 0.27734375, "learning_rate": 4.907270871183816e-05, "loss": 2.2857, "step": 3601 }, { "epoch": 0.48, "grad_norm": 0.28125, "learning_rate": 4.907213749705281e-05, "loss": 2.2771, "step": 3602 }, { "epoch": 0.48, "grad_norm": 0.294921875, "learning_rate": 4.9071566109713027e-05, "loss": 2.2817, "step": 3603 }, { "epoch": 0.48, "grad_norm": 0.28515625, "learning_rate": 4.907099454982291e-05, "loss": 2.2668, "step": 3604 }, { "epoch": 0.48, "grad_norm": 0.291015625, "learning_rate": 4.9070422817386554e-05, "loss": 2.313, "step": 3605 }, { "epoch": 0.48, "grad_norm": 0.275390625, "learning_rate": 4.906985091240805e-05, "loss": 2.2621, "step": 3606 }, { "epoch": 0.48, "grad_norm": 0.291015625, "learning_rate": 4.906927883489151e-05, "loss": 2.2777, "step": 3607 }, { "epoch": 0.48, "grad_norm": 0.296875, "learning_rate": 4.906870658484103e-05, "loss": 2.3217, "step": 3608 }, { "epoch": 0.48, "grad_norm": 0.2890625, "learning_rate": 4.9068134162260706e-05, "loss": 2.2419, "step": 3609 }, { "epoch": 0.48, "grad_norm": 0.306640625, "learning_rate": 4.906756156715464e-05, "loss": 2.2738, "step": 3610 }, { "epoch": 0.48, "grad_norm": 0.310546875, "learning_rate": 4.906698879952695e-05, "loss": 2.2804, "step": 3611 }, { "epoch": 0.48, "grad_norm": 0.29296875, "learning_rate": 4.906641585938174e-05, "loss": 2.2665, "step": 3612 }, { "epoch": 0.48, "grad_norm": 0.3203125, "learning_rate": 4.90658427467231e-05, "loss": 2.2698, "step": 3613 }, { "epoch": 0.48, "grad_norm": 0.29296875, "learning_rate": 4.9065269461555166e-05, "loss": 2.2691, "step": 3614 }, { "epoch": 0.48, "grad_norm": 0.29296875, "learning_rate": 4.9064696003882013e-05, "loss": 2.2656, "step": 3615 }, { "epoch": 0.48, "grad_norm": 0.31640625, "learning_rate": 4.9064122373707775e-05, "loss": 2.2983, "step": 3616 }, { "epoch": 0.48, "grad_norm": 0.29296875, "learning_rate": 4.9063548571036565e-05, "loss": 2.2768, "step": 3617 }, { "epoch": 0.48, "grad_norm": 0.322265625, "learning_rate": 4.906297459587248e-05, "loss": 2.2642, "step": 3618 }, { "epoch": 0.48, "grad_norm": 0.314453125, "learning_rate": 4.906240044821966e-05, "loss": 2.2778, "step": 3619 }, { "epoch": 0.48, "grad_norm": 0.291015625, "learning_rate": 4.9061826128082186e-05, "loss": 2.3024, "step": 3620 }, { "epoch": 0.48, "grad_norm": 0.29296875, "learning_rate": 4.9061251635464205e-05, "loss": 2.2913, "step": 3621 }, { "epoch": 0.48, "grad_norm": 0.29296875, "learning_rate": 4.9060676970369825e-05, "loss": 2.3103, "step": 3622 }, { "epoch": 0.48, "grad_norm": 0.283203125, "learning_rate": 4.906010213280315e-05, "loss": 2.2972, "step": 3623 }, { "epoch": 0.48, "grad_norm": 0.287109375, "learning_rate": 4.9059527122768326e-05, "loss": 2.2981, "step": 3624 }, { "epoch": 0.48, "grad_norm": 0.298828125, "learning_rate": 4.905895194026947e-05, "loss": 2.2691, "step": 3625 }, { "epoch": 0.48, "grad_norm": 0.302734375, "learning_rate": 4.905837658531068e-05, "loss": 2.3028, "step": 3626 }, { "epoch": 0.48, "grad_norm": 0.31640625, "learning_rate": 4.905780105789611e-05, "loss": 2.3028, "step": 3627 }, { "epoch": 0.48, "grad_norm": 0.310546875, "learning_rate": 4.905722535802988e-05, "loss": 2.2734, "step": 3628 }, { "epoch": 0.48, "grad_norm": 0.30859375, "learning_rate": 4.9056649485716096e-05, "loss": 2.2608, "step": 3629 }, { "epoch": 0.48, "grad_norm": 0.296875, "learning_rate": 4.9056073440958914e-05, "loss": 2.2519, "step": 3630 }, { "epoch": 0.48, "grad_norm": 0.296875, "learning_rate": 4.9055497223762445e-05, "loss": 2.2765, "step": 3631 }, { "epoch": 0.48, "grad_norm": 0.283203125, "learning_rate": 4.9054920834130824e-05, "loss": 2.3268, "step": 3632 }, { "epoch": 0.48, "grad_norm": 0.306640625, "learning_rate": 4.905434427206818e-05, "loss": 2.2828, "step": 3633 }, { "epoch": 0.48, "grad_norm": 0.2890625, "learning_rate": 4.905376753757865e-05, "loss": 2.2461, "step": 3634 }, { "epoch": 0.48, "grad_norm": 0.306640625, "learning_rate": 4.9053190630666376e-05, "loss": 2.2657, "step": 3635 }, { "epoch": 0.49, "grad_norm": 0.29296875, "learning_rate": 4.905261355133548e-05, "loss": 2.2968, "step": 3636 }, { "epoch": 0.49, "grad_norm": 0.30078125, "learning_rate": 4.90520362995901e-05, "loss": 2.3054, "step": 3637 }, { "epoch": 0.49, "grad_norm": 0.302734375, "learning_rate": 4.905145887543438e-05, "loss": 2.2661, "step": 3638 }, { "epoch": 0.49, "grad_norm": 0.30078125, "learning_rate": 4.905088127887245e-05, "loss": 2.3012, "step": 3639 }, { "epoch": 0.49, "grad_norm": 0.30078125, "learning_rate": 4.905030350990847e-05, "loss": 2.2718, "step": 3640 }, { "epoch": 0.49, "grad_norm": 0.3046875, "learning_rate": 4.904972556854655e-05, "loss": 2.247, "step": 3641 }, { "epoch": 0.49, "grad_norm": 0.310546875, "learning_rate": 4.904914745479087e-05, "loss": 2.2494, "step": 3642 }, { "epoch": 0.49, "grad_norm": 0.287109375, "learning_rate": 4.904856916864554e-05, "loss": 2.2745, "step": 3643 }, { "epoch": 0.49, "grad_norm": 0.2734375, "learning_rate": 4.9047990710114735e-05, "loss": 2.2746, "step": 3644 }, { "epoch": 0.49, "grad_norm": 0.302734375, "learning_rate": 4.9047412079202575e-05, "loss": 2.2732, "step": 3645 }, { "epoch": 0.49, "grad_norm": 0.310546875, "learning_rate": 4.904683327591322e-05, "loss": 2.2476, "step": 3646 }, { "epoch": 0.49, "grad_norm": 0.302734375, "learning_rate": 4.9046254300250824e-05, "loss": 2.2619, "step": 3647 }, { "epoch": 0.49, "grad_norm": 0.28515625, "learning_rate": 4.9045675152219534e-05, "loss": 2.2958, "step": 3648 }, { "epoch": 0.49, "grad_norm": 0.279296875, "learning_rate": 4.90450958318235e-05, "loss": 2.284, "step": 3649 }, { "epoch": 0.49, "grad_norm": 0.30078125, "learning_rate": 4.904451633906686e-05, "loss": 2.2679, "step": 3650 }, { "epoch": 0.49, "grad_norm": 0.29296875, "learning_rate": 4.90439366739538e-05, "loss": 2.2648, "step": 3651 }, { "epoch": 0.49, "grad_norm": 0.294921875, "learning_rate": 4.904335683648844e-05, "loss": 2.257, "step": 3652 }, { "epoch": 0.49, "grad_norm": 0.30859375, "learning_rate": 4.904277682667496e-05, "loss": 2.2732, "step": 3653 }, { "epoch": 0.49, "grad_norm": 0.302734375, "learning_rate": 4.9042196644517526e-05, "loss": 2.2421, "step": 3654 }, { "epoch": 0.49, "grad_norm": 0.291015625, "learning_rate": 4.9041616290020265e-05, "loss": 2.2634, "step": 3655 }, { "epoch": 0.49, "grad_norm": 0.294921875, "learning_rate": 4.904103576318736e-05, "loss": 2.2689, "step": 3656 }, { "epoch": 0.49, "grad_norm": 0.294921875, "learning_rate": 4.904045506402296e-05, "loss": 2.2759, "step": 3657 }, { "epoch": 0.49, "grad_norm": 0.306640625, "learning_rate": 4.9039874192531246e-05, "loss": 2.2866, "step": 3658 }, { "epoch": 0.49, "grad_norm": 0.310546875, "learning_rate": 4.903929314871636e-05, "loss": 2.2695, "step": 3659 }, { "epoch": 0.49, "grad_norm": 0.2890625, "learning_rate": 4.903871193258248e-05, "loss": 2.2514, "step": 3660 }, { "epoch": 0.49, "grad_norm": 0.296875, "learning_rate": 4.903813054413377e-05, "loss": 2.2521, "step": 3661 }, { "epoch": 0.49, "grad_norm": 0.29296875, "learning_rate": 4.903754898337439e-05, "loss": 2.2826, "step": 3662 }, { "epoch": 0.49, "grad_norm": 0.306640625, "learning_rate": 4.903696725030853e-05, "loss": 2.3069, "step": 3663 }, { "epoch": 0.49, "grad_norm": 0.291015625, "learning_rate": 4.903638534494034e-05, "loss": 2.303, "step": 3664 }, { "epoch": 0.49, "grad_norm": 0.294921875, "learning_rate": 4.9035803267273986e-05, "loss": 2.2619, "step": 3665 }, { "epoch": 0.49, "grad_norm": 0.30859375, "learning_rate": 4.903522101731366e-05, "loss": 2.2934, "step": 3666 }, { "epoch": 0.49, "grad_norm": 0.29296875, "learning_rate": 4.903463859506353e-05, "loss": 2.2863, "step": 3667 }, { "epoch": 0.49, "grad_norm": 0.3046875, "learning_rate": 4.903405600052776e-05, "loss": 2.26, "step": 3668 }, { "epoch": 0.49, "grad_norm": 0.294921875, "learning_rate": 4.903347323371054e-05, "loss": 2.2956, "step": 3669 }, { "epoch": 0.49, "grad_norm": 0.29296875, "learning_rate": 4.903289029461604e-05, "loss": 2.2581, "step": 3670 }, { "epoch": 0.49, "grad_norm": 0.30078125, "learning_rate": 4.9032307183248437e-05, "loss": 2.3018, "step": 3671 }, { "epoch": 0.49, "grad_norm": 0.3046875, "learning_rate": 4.903172389961192e-05, "loss": 2.2822, "step": 3672 }, { "epoch": 0.49, "grad_norm": 0.3125, "learning_rate": 4.903114044371066e-05, "loss": 2.2673, "step": 3673 }, { "epoch": 0.49, "grad_norm": 0.294921875, "learning_rate": 4.9030556815548845e-05, "loss": 2.267, "step": 3674 }, { "epoch": 0.49, "grad_norm": 0.31640625, "learning_rate": 4.902997301513066e-05, "loss": 2.2977, "step": 3675 }, { "epoch": 0.49, "grad_norm": 0.29296875, "learning_rate": 4.9029389042460276e-05, "loss": 2.2865, "step": 3676 }, { "epoch": 0.49, "grad_norm": 0.384765625, "learning_rate": 4.90288048975419e-05, "loss": 2.2665, "step": 3677 }, { "epoch": 0.49, "grad_norm": 0.296875, "learning_rate": 4.9028220580379705e-05, "loss": 2.253, "step": 3678 }, { "epoch": 0.49, "grad_norm": 0.291015625, "learning_rate": 4.902763609097789e-05, "loss": 2.3132, "step": 3679 }, { "epoch": 0.49, "grad_norm": 0.298828125, "learning_rate": 4.902705142934063e-05, "loss": 2.2264, "step": 3680 }, { "epoch": 0.49, "grad_norm": 0.318359375, "learning_rate": 4.902646659547212e-05, "loss": 2.2579, "step": 3681 }, { "epoch": 0.49, "grad_norm": 0.29296875, "learning_rate": 4.9025881589376566e-05, "loss": 2.2699, "step": 3682 }, { "epoch": 0.49, "grad_norm": 0.28515625, "learning_rate": 4.902529641105815e-05, "loss": 2.2888, "step": 3683 }, { "epoch": 0.49, "grad_norm": 0.310546875, "learning_rate": 4.902471106052107e-05, "loss": 2.2876, "step": 3684 }, { "epoch": 0.49, "grad_norm": 0.28515625, "learning_rate": 4.9024125537769514e-05, "loss": 2.2869, "step": 3685 }, { "epoch": 0.49, "grad_norm": 0.287109375, "learning_rate": 4.90235398428077e-05, "loss": 2.2448, "step": 3686 }, { "epoch": 0.49, "grad_norm": 0.294921875, "learning_rate": 4.9022953975639795e-05, "loss": 2.2625, "step": 3687 }, { "epoch": 0.49, "grad_norm": 0.310546875, "learning_rate": 4.9022367936270026e-05, "loss": 2.2669, "step": 3688 }, { "epoch": 0.49, "grad_norm": 0.287109375, "learning_rate": 4.902178172470258e-05, "loss": 2.285, "step": 3689 }, { "epoch": 0.49, "grad_norm": 0.302734375, "learning_rate": 4.902119534094166e-05, "loss": 2.2768, "step": 3690 }, { "epoch": 0.49, "grad_norm": 0.3046875, "learning_rate": 4.902060878499148e-05, "loss": 2.2634, "step": 3691 }, { "epoch": 0.49, "grad_norm": 0.306640625, "learning_rate": 4.902002205685623e-05, "loss": 2.2795, "step": 3692 }, { "epoch": 0.49, "grad_norm": 0.294921875, "learning_rate": 4.901943515654013e-05, "loss": 2.2542, "step": 3693 }, { "epoch": 0.49, "grad_norm": 0.29296875, "learning_rate": 4.9018848084047376e-05, "loss": 2.2726, "step": 3694 }, { "epoch": 0.49, "grad_norm": 0.298828125, "learning_rate": 4.901826083938218e-05, "loss": 2.2802, "step": 3695 }, { "epoch": 0.49, "grad_norm": 0.29296875, "learning_rate": 4.901767342254875e-05, "loss": 2.2791, "step": 3696 }, { "epoch": 0.49, "grad_norm": 0.302734375, "learning_rate": 4.901708583355129e-05, "loss": 2.2862, "step": 3697 }, { "epoch": 0.49, "grad_norm": 0.302734375, "learning_rate": 4.901649807239403e-05, "loss": 2.2976, "step": 3698 }, { "epoch": 0.49, "grad_norm": 0.28515625, "learning_rate": 4.901591013908117e-05, "loss": 2.2398, "step": 3699 }, { "epoch": 0.49, "grad_norm": 0.306640625, "learning_rate": 4.901532203361693e-05, "loss": 2.2449, "step": 3700 }, { "epoch": 0.49, "grad_norm": 0.28515625, "learning_rate": 4.901473375600552e-05, "loss": 2.2485, "step": 3701 }, { "epoch": 0.49, "grad_norm": 0.306640625, "learning_rate": 4.9014145306251164e-05, "loss": 2.2881, "step": 3702 }, { "epoch": 0.49, "grad_norm": 0.31640625, "learning_rate": 4.9013556684358075e-05, "loss": 2.2829, "step": 3703 }, { "epoch": 0.49, "grad_norm": 0.3046875, "learning_rate": 4.901296789033047e-05, "loss": 2.261, "step": 3704 }, { "epoch": 0.49, "grad_norm": 0.296875, "learning_rate": 4.9012378924172575e-05, "loss": 2.288, "step": 3705 }, { "epoch": 0.49, "grad_norm": 0.30078125, "learning_rate": 4.9011789785888614e-05, "loss": 2.2939, "step": 3706 }, { "epoch": 0.49, "grad_norm": 0.298828125, "learning_rate": 4.90112004754828e-05, "loss": 2.2774, "step": 3707 }, { "epoch": 0.49, "grad_norm": 0.2890625, "learning_rate": 4.901061099295937e-05, "loss": 2.2802, "step": 3708 }, { "epoch": 0.49, "grad_norm": 0.3046875, "learning_rate": 4.9010021338322534e-05, "loss": 2.2837, "step": 3709 }, { "epoch": 0.49, "grad_norm": 0.3046875, "learning_rate": 4.9009431511576536e-05, "loss": 2.269, "step": 3710 }, { "epoch": 0.5, "grad_norm": 0.294921875, "learning_rate": 4.9008841512725594e-05, "loss": 2.2626, "step": 3711 }, { "epoch": 0.5, "grad_norm": 0.302734375, "learning_rate": 4.9008251341773934e-05, "loss": 2.2638, "step": 3712 }, { "epoch": 0.5, "grad_norm": 0.279296875, "learning_rate": 4.90076609987258e-05, "loss": 2.2635, "step": 3713 }, { "epoch": 0.5, "grad_norm": 0.275390625, "learning_rate": 4.900707048358541e-05, "loss": 2.2631, "step": 3714 }, { "epoch": 0.5, "grad_norm": 0.28125, "learning_rate": 4.900647979635701e-05, "loss": 2.2866, "step": 3715 }, { "epoch": 0.5, "grad_norm": 0.28515625, "learning_rate": 4.900588893704482e-05, "loss": 2.2573, "step": 3716 }, { "epoch": 0.5, "grad_norm": 0.29296875, "learning_rate": 4.900529790565308e-05, "loss": 2.2709, "step": 3717 }, { "epoch": 0.5, "grad_norm": 0.294921875, "learning_rate": 4.900470670218603e-05, "loss": 2.2835, "step": 3718 }, { "epoch": 0.5, "grad_norm": 0.298828125, "learning_rate": 4.9004115326647904e-05, "loss": 2.27, "step": 3719 }, { "epoch": 0.5, "grad_norm": 0.298828125, "learning_rate": 4.900352377904295e-05, "loss": 2.2616, "step": 3720 }, { "epoch": 0.5, "grad_norm": 0.3046875, "learning_rate": 4.9002932059375396e-05, "loss": 2.2878, "step": 3721 }, { "epoch": 0.5, "grad_norm": 0.287109375, "learning_rate": 4.9002340167649483e-05, "loss": 2.2485, "step": 3722 }, { "epoch": 0.5, "grad_norm": 0.2890625, "learning_rate": 4.900174810386947e-05, "loss": 2.2278, "step": 3723 }, { "epoch": 0.5, "grad_norm": 0.3046875, "learning_rate": 4.900115586803959e-05, "loss": 2.288, "step": 3724 }, { "epoch": 0.5, "grad_norm": 0.30859375, "learning_rate": 4.90005634601641e-05, "loss": 2.2662, "step": 3725 }, { "epoch": 0.5, "grad_norm": 0.310546875, "learning_rate": 4.899997088024721e-05, "loss": 2.2675, "step": 3726 }, { "epoch": 0.5, "grad_norm": 0.279296875, "learning_rate": 4.899937812829321e-05, "loss": 2.2807, "step": 3727 }, { "epoch": 0.5, "grad_norm": 0.28515625, "learning_rate": 4.8998785204306334e-05, "loss": 2.2842, "step": 3728 }, { "epoch": 0.5, "grad_norm": 0.310546875, "learning_rate": 4.899819210829083e-05, "loss": 2.2771, "step": 3729 }, { "epoch": 0.5, "grad_norm": 0.283203125, "learning_rate": 4.8997598840250945e-05, "loss": 2.2721, "step": 3730 }, { "epoch": 0.5, "grad_norm": 0.30078125, "learning_rate": 4.899700540019094e-05, "loss": 2.2972, "step": 3731 }, { "epoch": 0.5, "grad_norm": 0.294921875, "learning_rate": 4.899641178811507e-05, "loss": 2.3266, "step": 3732 }, { "epoch": 0.5, "grad_norm": 0.30078125, "learning_rate": 4.8995818004027586e-05, "loss": 2.2768, "step": 3733 }, { "epoch": 0.5, "grad_norm": 0.2890625, "learning_rate": 4.899522404793274e-05, "loss": 2.2787, "step": 3734 }, { "epoch": 0.5, "grad_norm": 0.30078125, "learning_rate": 4.899462991983479e-05, "loss": 2.3129, "step": 3735 }, { "epoch": 0.5, "grad_norm": 0.326171875, "learning_rate": 4.899403561973801e-05, "loss": 2.2676, "step": 3736 }, { "epoch": 0.5, "grad_norm": 0.287109375, "learning_rate": 4.899344114764664e-05, "loss": 2.2766, "step": 3737 }, { "epoch": 0.5, "grad_norm": 0.318359375, "learning_rate": 4.899284650356495e-05, "loss": 2.2609, "step": 3738 }, { "epoch": 0.5, "grad_norm": 0.30859375, "learning_rate": 4.8992251687497207e-05, "loss": 2.2788, "step": 3739 }, { "epoch": 0.5, "grad_norm": 0.298828125, "learning_rate": 4.8991656699447675e-05, "loss": 2.2801, "step": 3740 }, { "epoch": 0.5, "grad_norm": 0.29296875, "learning_rate": 4.8991061539420606e-05, "loss": 2.242, "step": 3741 }, { "epoch": 0.5, "grad_norm": 0.291015625, "learning_rate": 4.899046620742028e-05, "loss": 2.2529, "step": 3742 }, { "epoch": 0.5, "grad_norm": 0.294921875, "learning_rate": 4.898987070345096e-05, "loss": 2.2783, "step": 3743 }, { "epoch": 0.5, "grad_norm": 0.29296875, "learning_rate": 4.898927502751691e-05, "loss": 2.2631, "step": 3744 }, { "epoch": 0.5, "grad_norm": 0.3125, "learning_rate": 4.8988679179622406e-05, "loss": 2.2746, "step": 3745 }, { "epoch": 0.5, "grad_norm": 0.2890625, "learning_rate": 4.898808315977171e-05, "loss": 2.2591, "step": 3746 }, { "epoch": 0.5, "grad_norm": 0.283203125, "learning_rate": 4.898748696796911e-05, "loss": 2.2801, "step": 3747 }, { "epoch": 0.5, "grad_norm": 0.298828125, "learning_rate": 4.8986890604218874e-05, "loss": 2.2883, "step": 3748 }, { "epoch": 0.5, "grad_norm": 0.294921875, "learning_rate": 4.8986294068525276e-05, "loss": 2.2535, "step": 3749 }, { "epoch": 0.5, "grad_norm": 0.283203125, "learning_rate": 4.898569736089258e-05, "loss": 2.2973, "step": 3750 }, { "epoch": 0.5, "grad_norm": 0.28125, "learning_rate": 4.8985100481325076e-05, "loss": 2.2555, "step": 3751 }, { "epoch": 0.5, "grad_norm": 0.2890625, "learning_rate": 4.8984503429827045e-05, "loss": 2.3019, "step": 3752 }, { "epoch": 0.5, "grad_norm": 0.287109375, "learning_rate": 4.898390620640275e-05, "loss": 2.2902, "step": 3753 }, { "epoch": 0.5, "grad_norm": 0.310546875, "learning_rate": 4.8983308811056494e-05, "loss": 2.3127, "step": 3754 }, { "epoch": 0.5, "grad_norm": 0.27734375, "learning_rate": 4.898271124379255e-05, "loss": 2.2739, "step": 3755 }, { "epoch": 0.5, "grad_norm": 0.306640625, "learning_rate": 4.89821135046152e-05, "loss": 2.2728, "step": 3756 }, { "epoch": 0.5, "grad_norm": 0.30859375, "learning_rate": 4.8981515593528725e-05, "loss": 2.3018, "step": 3757 }, { "epoch": 0.5, "grad_norm": 0.310546875, "learning_rate": 4.898091751053742e-05, "loss": 2.2692, "step": 3758 }, { "epoch": 0.5, "grad_norm": 0.302734375, "learning_rate": 4.898031925564556e-05, "loss": 2.2603, "step": 3759 }, { "epoch": 0.5, "grad_norm": 0.294921875, "learning_rate": 4.897972082885745e-05, "loss": 2.2783, "step": 3760 }, { "epoch": 0.5, "grad_norm": 0.2890625, "learning_rate": 4.897912223017737e-05, "loss": 2.2804, "step": 3761 }, { "epoch": 0.5, "grad_norm": 0.291015625, "learning_rate": 4.8978523459609606e-05, "loss": 2.2426, "step": 3762 }, { "epoch": 0.5, "grad_norm": 0.30859375, "learning_rate": 4.8977924517158456e-05, "loss": 2.2629, "step": 3763 }, { "epoch": 0.5, "grad_norm": 0.30078125, "learning_rate": 4.8977325402828213e-05, "loss": 2.2761, "step": 3764 }, { "epoch": 0.5, "grad_norm": 0.3203125, "learning_rate": 4.8976726116623175e-05, "loss": 2.2815, "step": 3765 }, { "epoch": 0.5, "grad_norm": 0.296875, "learning_rate": 4.897612665854763e-05, "loss": 2.2581, "step": 3766 }, { "epoch": 0.5, "grad_norm": 0.29296875, "learning_rate": 4.897552702860588e-05, "loss": 2.2843, "step": 3767 }, { "epoch": 0.5, "grad_norm": 0.3125, "learning_rate": 4.897492722680223e-05, "loss": 2.275, "step": 3768 }, { "epoch": 0.5, "grad_norm": 0.287109375, "learning_rate": 4.897432725314097e-05, "loss": 2.2359, "step": 3769 }, { "epoch": 0.5, "grad_norm": 0.294921875, "learning_rate": 4.89737271076264e-05, "loss": 2.282, "step": 3770 }, { "epoch": 0.5, "grad_norm": 0.291015625, "learning_rate": 4.8973126790262825e-05, "loss": 2.2844, "step": 3771 }, { "epoch": 0.5, "grad_norm": 0.29296875, "learning_rate": 4.897252630105455e-05, "loss": 2.2667, "step": 3772 }, { "epoch": 0.5, "grad_norm": 0.302734375, "learning_rate": 4.897192564000587e-05, "loss": 2.2531, "step": 3773 }, { "epoch": 0.5, "grad_norm": 0.30078125, "learning_rate": 4.89713248071211e-05, "loss": 2.2449, "step": 3774 }, { "epoch": 0.5, "grad_norm": 0.296875, "learning_rate": 4.897072380240455e-05, "loss": 2.2942, "step": 3775 }, { "epoch": 0.5, "grad_norm": 0.302734375, "learning_rate": 4.897012262586052e-05, "loss": 2.2691, "step": 3776 }, { "epoch": 0.5, "grad_norm": 0.314453125, "learning_rate": 4.896952127749334e-05, "loss": 2.2741, "step": 3777 }, { "epoch": 0.5, "grad_norm": 0.296875, "learning_rate": 4.896891975730728e-05, "loss": 2.2596, "step": 3778 }, { "epoch": 0.5, "grad_norm": 0.283203125, "learning_rate": 4.896831806530669e-05, "loss": 2.2648, "step": 3779 }, { "epoch": 0.5, "grad_norm": 0.275390625, "learning_rate": 4.8967716201495856e-05, "loss": 2.2514, "step": 3780 }, { "epoch": 0.5, "grad_norm": 0.294921875, "learning_rate": 4.896711416587911e-05, "loss": 2.2682, "step": 3781 }, { "epoch": 0.5, "grad_norm": 0.30078125, "learning_rate": 4.896651195846076e-05, "loss": 2.2663, "step": 3782 }, { "epoch": 0.5, "grad_norm": 0.2734375, "learning_rate": 4.896590957924513e-05, "loss": 2.2552, "step": 3783 }, { "epoch": 0.5, "grad_norm": 0.30078125, "learning_rate": 4.896530702823653e-05, "loss": 2.3091, "step": 3784 }, { "epoch": 0.5, "grad_norm": 0.28515625, "learning_rate": 4.896470430543928e-05, "loss": 2.26, "step": 3785 }, { "epoch": 0.51, "grad_norm": 0.291015625, "learning_rate": 4.89641014108577e-05, "loss": 2.2842, "step": 3786 }, { "epoch": 0.51, "grad_norm": 0.2890625, "learning_rate": 4.896349834449612e-05, "loss": 2.2867, "step": 3787 }, { "epoch": 0.51, "grad_norm": 0.275390625, "learning_rate": 4.8962895106358856e-05, "loss": 2.2677, "step": 3788 }, { "epoch": 0.51, "grad_norm": 0.29296875, "learning_rate": 4.896229169645023e-05, "loss": 2.2768, "step": 3789 }, { "epoch": 0.51, "grad_norm": 0.2890625, "learning_rate": 4.896168811477457e-05, "loss": 2.2678, "step": 3790 }, { "epoch": 0.51, "grad_norm": 0.275390625, "learning_rate": 4.896108436133621e-05, "loss": 2.289, "step": 3791 }, { "epoch": 0.51, "grad_norm": 0.310546875, "learning_rate": 4.8960480436139465e-05, "loss": 2.3065, "step": 3792 }, { "epoch": 0.51, "grad_norm": 0.306640625, "learning_rate": 4.895987633918867e-05, "loss": 2.2727, "step": 3793 }, { "epoch": 0.51, "grad_norm": 0.29296875, "learning_rate": 4.895927207048816e-05, "loss": 2.2583, "step": 3794 }, { "epoch": 0.51, "grad_norm": 0.287109375, "learning_rate": 4.8958667630042255e-05, "loss": 2.2738, "step": 3795 }, { "epoch": 0.51, "grad_norm": 0.291015625, "learning_rate": 4.89580630178553e-05, "loss": 2.2735, "step": 3796 }, { "epoch": 0.51, "grad_norm": 0.294921875, "learning_rate": 4.895745823393162e-05, "loss": 2.2789, "step": 3797 }, { "epoch": 0.51, "grad_norm": 0.296875, "learning_rate": 4.895685327827556e-05, "loss": 2.2759, "step": 3798 }, { "epoch": 0.51, "grad_norm": 0.306640625, "learning_rate": 4.895624815089144e-05, "loss": 2.2777, "step": 3799 }, { "epoch": 0.51, "grad_norm": 0.298828125, "learning_rate": 4.895564285178362e-05, "loss": 2.2838, "step": 3800 }, { "epoch": 0.51, "grad_norm": 0.302734375, "learning_rate": 4.895503738095641e-05, "loss": 2.2555, "step": 3801 }, { "epoch": 0.51, "grad_norm": 0.298828125, "learning_rate": 4.8954431738414184e-05, "loss": 2.2437, "step": 3802 }, { "epoch": 0.51, "grad_norm": 0.310546875, "learning_rate": 4.895382592416126e-05, "loss": 2.2853, "step": 3803 }, { "epoch": 0.51, "grad_norm": 0.322265625, "learning_rate": 4.8953219938201986e-05, "loss": 2.2696, "step": 3804 }, { "epoch": 0.51, "grad_norm": 0.2890625, "learning_rate": 4.895261378054071e-05, "loss": 2.2722, "step": 3805 }, { "epoch": 0.51, "grad_norm": 0.30859375, "learning_rate": 4.8952007451181777e-05, "loss": 2.2587, "step": 3806 }, { "epoch": 0.51, "grad_norm": 0.2890625, "learning_rate": 4.895140095012952e-05, "loss": 2.2542, "step": 3807 }, { "epoch": 0.51, "grad_norm": 0.310546875, "learning_rate": 4.8950794277388304e-05, "loss": 2.2521, "step": 3808 }, { "epoch": 0.51, "grad_norm": 0.294921875, "learning_rate": 4.895018743296247e-05, "loss": 2.2356, "step": 3809 }, { "epoch": 0.51, "grad_norm": 0.27734375, "learning_rate": 4.8949580416856364e-05, "loss": 2.2934, "step": 3810 }, { "epoch": 0.51, "grad_norm": 0.31640625, "learning_rate": 4.894897322907435e-05, "loss": 2.2694, "step": 3811 }, { "epoch": 0.51, "grad_norm": 0.298828125, "learning_rate": 4.8948365869620766e-05, "loss": 2.291, "step": 3812 }, { "epoch": 0.51, "grad_norm": 0.3046875, "learning_rate": 4.894775833849997e-05, "loss": 2.2407, "step": 3813 }, { "epoch": 0.51, "grad_norm": 0.310546875, "learning_rate": 4.894715063571632e-05, "loss": 2.2461, "step": 3814 }, { "epoch": 0.51, "grad_norm": 0.306640625, "learning_rate": 4.894654276127418e-05, "loss": 2.2779, "step": 3815 }, { "epoch": 0.51, "grad_norm": 0.287109375, "learning_rate": 4.894593471517789e-05, "loss": 2.2549, "step": 3816 }, { "epoch": 0.51, "grad_norm": 0.32421875, "learning_rate": 4.894532649743182e-05, "loss": 2.2694, "step": 3817 }, { "epoch": 0.51, "grad_norm": 0.28515625, "learning_rate": 4.894471810804032e-05, "loss": 2.2559, "step": 3818 }, { "epoch": 0.51, "grad_norm": 0.302734375, "learning_rate": 4.894410954700777e-05, "loss": 2.2527, "step": 3819 }, { "epoch": 0.51, "grad_norm": 0.287109375, "learning_rate": 4.894350081433851e-05, "loss": 2.2436, "step": 3820 }, { "epoch": 0.51, "grad_norm": 0.3046875, "learning_rate": 4.8942891910036915e-05, "loss": 2.3, "step": 3821 }, { "epoch": 0.51, "grad_norm": 0.302734375, "learning_rate": 4.894228283410736e-05, "loss": 2.2928, "step": 3822 }, { "epoch": 0.51, "grad_norm": 0.302734375, "learning_rate": 4.894167358655418e-05, "loss": 2.2731, "step": 3823 }, { "epoch": 0.51, "grad_norm": 0.29296875, "learning_rate": 4.8941064167381774e-05, "loss": 2.2339, "step": 3824 }, { "epoch": 0.51, "grad_norm": 0.283203125, "learning_rate": 4.8940454576594495e-05, "loss": 2.2814, "step": 3825 }, { "epoch": 0.51, "grad_norm": 0.32421875, "learning_rate": 4.8939844814196714e-05, "loss": 2.271, "step": 3826 }, { "epoch": 0.51, "grad_norm": 0.3125, "learning_rate": 4.893923488019281e-05, "loss": 2.304, "step": 3827 }, { "epoch": 0.51, "grad_norm": 0.296875, "learning_rate": 4.8938624774587146e-05, "loss": 2.2684, "step": 3828 }, { "epoch": 0.51, "grad_norm": 0.2890625, "learning_rate": 4.89380144973841e-05, "loss": 2.2509, "step": 3829 }, { "epoch": 0.51, "grad_norm": 0.2890625, "learning_rate": 4.893740404858804e-05, "loss": 2.2533, "step": 3830 }, { "epoch": 0.51, "grad_norm": 0.29296875, "learning_rate": 4.893679342820335e-05, "loss": 2.2666, "step": 3831 }, { "epoch": 0.51, "grad_norm": 0.306640625, "learning_rate": 4.8936182636234404e-05, "loss": 2.3012, "step": 3832 }, { "epoch": 0.51, "grad_norm": 0.287109375, "learning_rate": 4.8935571672685574e-05, "loss": 2.2824, "step": 3833 }, { "epoch": 0.51, "grad_norm": 0.287109375, "learning_rate": 4.8934960537561256e-05, "loss": 2.2665, "step": 3834 }, { "epoch": 0.51, "grad_norm": 0.291015625, "learning_rate": 4.8934349230865813e-05, "loss": 2.2777, "step": 3835 }, { "epoch": 0.51, "grad_norm": 0.283203125, "learning_rate": 4.893373775260364e-05, "loss": 2.2773, "step": 3836 }, { "epoch": 0.51, "grad_norm": 0.275390625, "learning_rate": 4.8933126102779105e-05, "loss": 2.3045, "step": 3837 }, { "epoch": 0.51, "grad_norm": 0.28125, "learning_rate": 4.893251428139661e-05, "loss": 2.2596, "step": 3838 }, { "epoch": 0.51, "grad_norm": 0.30859375, "learning_rate": 4.893190228846053e-05, "loss": 2.2388, "step": 3839 }, { "epoch": 0.51, "grad_norm": 0.29296875, "learning_rate": 4.893129012397525e-05, "loss": 2.2665, "step": 3840 }, { "epoch": 0.51, "grad_norm": 0.3203125, "learning_rate": 4.893067778794517e-05, "loss": 2.2454, "step": 3841 }, { "epoch": 0.51, "grad_norm": 0.279296875, "learning_rate": 4.8930065280374674e-05, "loss": 2.2768, "step": 3842 }, { "epoch": 0.51, "grad_norm": 0.279296875, "learning_rate": 4.892945260126814e-05, "loss": 2.2802, "step": 3843 }, { "epoch": 0.51, "grad_norm": 0.302734375, "learning_rate": 4.892883975062997e-05, "loss": 2.2686, "step": 3844 }, { "epoch": 0.51, "grad_norm": 0.2734375, "learning_rate": 4.892822672846457e-05, "loss": 2.2722, "step": 3845 }, { "epoch": 0.51, "grad_norm": 0.296875, "learning_rate": 4.8927613534776316e-05, "loss": 2.2756, "step": 3846 }, { "epoch": 0.51, "grad_norm": 0.3125, "learning_rate": 4.892700016956961e-05, "loss": 2.3174, "step": 3847 }, { "epoch": 0.51, "grad_norm": 0.29296875, "learning_rate": 4.892638663284885e-05, "loss": 2.2888, "step": 3848 }, { "epoch": 0.51, "grad_norm": 0.306640625, "learning_rate": 4.892577292461843e-05, "loss": 2.3037, "step": 3849 }, { "epoch": 0.51, "grad_norm": 0.298828125, "learning_rate": 4.892515904488274e-05, "loss": 2.3051, "step": 3850 }, { "epoch": 0.51, "grad_norm": 0.306640625, "learning_rate": 4.89245449936462e-05, "loss": 2.2529, "step": 3851 }, { "epoch": 0.51, "grad_norm": 0.296875, "learning_rate": 4.892393077091321e-05, "loss": 2.2693, "step": 3852 }, { "epoch": 0.51, "grad_norm": 0.296875, "learning_rate": 4.8923316376688166e-05, "loss": 2.2819, "step": 3853 }, { "epoch": 0.51, "grad_norm": 0.275390625, "learning_rate": 4.892270181097547e-05, "loss": 2.3159, "step": 3854 }, { "epoch": 0.51, "grad_norm": 0.310546875, "learning_rate": 4.892208707377952e-05, "loss": 2.2744, "step": 3855 }, { "epoch": 0.51, "grad_norm": 0.291015625, "learning_rate": 4.8921472165104745e-05, "loss": 2.281, "step": 3856 }, { "epoch": 0.51, "grad_norm": 0.28125, "learning_rate": 4.892085708495553e-05, "loss": 2.2338, "step": 3857 }, { "epoch": 0.51, "grad_norm": 0.2890625, "learning_rate": 4.89202418333363e-05, "loss": 2.3027, "step": 3858 }, { "epoch": 0.51, "grad_norm": 0.279296875, "learning_rate": 4.891962641025146e-05, "loss": 2.2697, "step": 3859 }, { "epoch": 0.51, "grad_norm": 0.3125, "learning_rate": 4.891901081570541e-05, "loss": 2.2263, "step": 3860 }, { "epoch": 0.52, "grad_norm": 0.287109375, "learning_rate": 4.891839504970259e-05, "loss": 2.2889, "step": 3861 }, { "epoch": 0.52, "grad_norm": 0.2890625, "learning_rate": 4.891777911224739e-05, "loss": 2.2617, "step": 3862 }, { "epoch": 0.52, "grad_norm": 0.2734375, "learning_rate": 4.891716300334423e-05, "loss": 2.2761, "step": 3863 }, { "epoch": 0.52, "grad_norm": 0.330078125, "learning_rate": 4.8916546722997526e-05, "loss": 2.2721, "step": 3864 }, { "epoch": 0.52, "grad_norm": 0.298828125, "learning_rate": 4.891593027121171e-05, "loss": 2.2899, "step": 3865 }, { "epoch": 0.52, "grad_norm": 0.3046875, "learning_rate": 4.891531364799118e-05, "loss": 2.2682, "step": 3866 }, { "epoch": 0.52, "grad_norm": 0.3046875, "learning_rate": 4.891469685334037e-05, "loss": 2.2836, "step": 3867 }, { "epoch": 0.52, "grad_norm": 0.2890625, "learning_rate": 4.8914079887263694e-05, "loss": 2.2631, "step": 3868 }, { "epoch": 0.52, "grad_norm": 0.275390625, "learning_rate": 4.8913462749765573e-05, "loss": 2.2697, "step": 3869 }, { "epoch": 0.52, "grad_norm": 0.298828125, "learning_rate": 4.891284544085044e-05, "loss": 2.2474, "step": 3870 }, { "epoch": 0.52, "grad_norm": 0.30078125, "learning_rate": 4.891222796052272e-05, "loss": 2.2217, "step": 3871 }, { "epoch": 0.52, "grad_norm": 0.291015625, "learning_rate": 4.891161030878683e-05, "loss": 2.2887, "step": 3872 }, { "epoch": 0.52, "grad_norm": 0.298828125, "learning_rate": 4.8910992485647206e-05, "loss": 2.264, "step": 3873 }, { "epoch": 0.52, "grad_norm": 0.298828125, "learning_rate": 4.891037449110827e-05, "loss": 2.2823, "step": 3874 }, { "epoch": 0.52, "grad_norm": 0.294921875, "learning_rate": 4.890975632517445e-05, "loss": 2.3137, "step": 3875 }, { "epoch": 0.52, "grad_norm": 0.310546875, "learning_rate": 4.8909137987850186e-05, "loss": 2.2707, "step": 3876 }, { "epoch": 0.52, "grad_norm": 0.294921875, "learning_rate": 4.890851947913991e-05, "loss": 2.2746, "step": 3877 }, { "epoch": 0.52, "grad_norm": 0.28125, "learning_rate": 4.890790079904804e-05, "loss": 2.2745, "step": 3878 }, { "epoch": 0.52, "grad_norm": 0.306640625, "learning_rate": 4.890728194757904e-05, "loss": 2.237, "step": 3879 }, { "epoch": 0.52, "grad_norm": 0.318359375, "learning_rate": 4.890666292473731e-05, "loss": 2.2675, "step": 3880 }, { "epoch": 0.52, "grad_norm": 0.31640625, "learning_rate": 4.890604373052732e-05, "loss": 2.2688, "step": 3881 }, { "epoch": 0.52, "grad_norm": 0.291015625, "learning_rate": 4.890542436495349e-05, "loss": 2.2688, "step": 3882 }, { "epoch": 0.52, "grad_norm": 0.279296875, "learning_rate": 4.890480482802026e-05, "loss": 2.2715, "step": 3883 }, { "epoch": 0.52, "grad_norm": 0.287109375, "learning_rate": 4.890418511973208e-05, "loss": 2.257, "step": 3884 }, { "epoch": 0.52, "grad_norm": 0.291015625, "learning_rate": 4.890356524009339e-05, "loss": 2.2813, "step": 3885 }, { "epoch": 0.52, "grad_norm": 0.30859375, "learning_rate": 4.8902945189108615e-05, "loss": 2.2651, "step": 3886 }, { "epoch": 0.52, "grad_norm": 0.296875, "learning_rate": 4.890232496678223e-05, "loss": 2.2505, "step": 3887 }, { "epoch": 0.52, "grad_norm": 0.326171875, "learning_rate": 4.8901704573118664e-05, "loss": 2.3132, "step": 3888 }, { "epoch": 0.52, "grad_norm": 0.2890625, "learning_rate": 4.890108400812237e-05, "loss": 2.2594, "step": 3889 }, { "epoch": 0.52, "grad_norm": 0.302734375, "learning_rate": 4.8900463271797785e-05, "loss": 2.251, "step": 3890 }, { "epoch": 0.52, "grad_norm": 0.318359375, "learning_rate": 4.8899842364149375e-05, "loss": 2.2301, "step": 3891 }, { "epoch": 0.52, "grad_norm": 0.33203125, "learning_rate": 4.889922128518157e-05, "loss": 2.2824, "step": 3892 }, { "epoch": 0.52, "grad_norm": 0.3125, "learning_rate": 4.889860003489885e-05, "loss": 2.3019, "step": 3893 }, { "epoch": 0.52, "grad_norm": 0.296875, "learning_rate": 4.889797861330564e-05, "loss": 2.2834, "step": 3894 }, { "epoch": 0.52, "grad_norm": 0.3046875, "learning_rate": 4.889735702040641e-05, "loss": 2.2951, "step": 3895 }, { "epoch": 0.52, "grad_norm": 0.298828125, "learning_rate": 4.889673525620562e-05, "loss": 2.2845, "step": 3896 }, { "epoch": 0.52, "grad_norm": 0.296875, "learning_rate": 4.8896113320707704e-05, "loss": 2.2842, "step": 3897 }, { "epoch": 0.52, "grad_norm": 0.302734375, "learning_rate": 4.889549121391714e-05, "loss": 2.2704, "step": 3898 }, { "epoch": 0.52, "grad_norm": 0.291015625, "learning_rate": 4.889486893583839e-05, "loss": 2.2434, "step": 3899 }, { "epoch": 0.52, "grad_norm": 0.294921875, "learning_rate": 4.8894246486475914e-05, "loss": 2.2495, "step": 3900 }, { "epoch": 0.52, "grad_norm": 0.302734375, "learning_rate": 4.8893623865834157e-05, "loss": 2.2953, "step": 3901 }, { "epoch": 0.52, "grad_norm": 0.287109375, "learning_rate": 4.88930010739176e-05, "loss": 2.2565, "step": 3902 }, { "epoch": 0.52, "grad_norm": 0.29296875, "learning_rate": 4.889237811073069e-05, "loss": 2.2856, "step": 3903 }, { "epoch": 0.52, "grad_norm": 0.302734375, "learning_rate": 4.889175497627791e-05, "loss": 2.2686, "step": 3904 }, { "epoch": 0.52, "grad_norm": 0.3046875, "learning_rate": 4.889113167056372e-05, "loss": 2.3159, "step": 3905 }, { "epoch": 0.52, "grad_norm": 0.310546875, "learning_rate": 4.889050819359258e-05, "loss": 2.2829, "step": 3906 }, { "epoch": 0.52, "grad_norm": 0.283203125, "learning_rate": 4.888988454536898e-05, "loss": 2.2627, "step": 3907 }, { "epoch": 0.52, "grad_norm": 0.3046875, "learning_rate": 4.888926072589736e-05, "loss": 2.242, "step": 3908 }, { "epoch": 0.52, "grad_norm": 0.318359375, "learning_rate": 4.888863673518222e-05, "loss": 2.2808, "step": 3909 }, { "epoch": 0.52, "grad_norm": 0.2890625, "learning_rate": 4.888801257322801e-05, "loss": 2.2676, "step": 3910 }, { "epoch": 0.52, "grad_norm": 0.296875, "learning_rate": 4.888738824003923e-05, "loss": 2.2572, "step": 3911 }, { "epoch": 0.52, "grad_norm": 0.3046875, "learning_rate": 4.888676373562034e-05, "loss": 2.2794, "step": 3912 }, { "epoch": 0.52, "grad_norm": 0.30859375, "learning_rate": 4.8886139059975814e-05, "loss": 2.2755, "step": 3913 }, { "epoch": 0.52, "grad_norm": 0.310546875, "learning_rate": 4.888551421311013e-05, "loss": 2.2651, "step": 3914 }, { "epoch": 0.52, "grad_norm": 0.302734375, "learning_rate": 4.888488919502777e-05, "loss": 2.2553, "step": 3915 }, { "epoch": 0.52, "grad_norm": 0.2890625, "learning_rate": 4.8884264005733224e-05, "loss": 2.2834, "step": 3916 }, { "epoch": 0.52, "grad_norm": 0.29296875, "learning_rate": 4.888363864523096e-05, "loss": 2.277, "step": 3917 }, { "epoch": 0.52, "grad_norm": 0.29296875, "learning_rate": 4.888301311352547e-05, "loss": 2.2685, "step": 3918 }, { "epoch": 0.52, "grad_norm": 0.2734375, "learning_rate": 4.888238741062122e-05, "loss": 2.2794, "step": 3919 }, { "epoch": 0.52, "grad_norm": 0.314453125, "learning_rate": 4.888176153652272e-05, "loss": 2.2734, "step": 3920 }, { "epoch": 0.52, "grad_norm": 0.296875, "learning_rate": 4.888113549123443e-05, "loss": 2.2601, "step": 3921 }, { "epoch": 0.52, "grad_norm": 0.314453125, "learning_rate": 4.8880509274760864e-05, "loss": 2.2845, "step": 3922 }, { "epoch": 0.52, "grad_norm": 0.314453125, "learning_rate": 4.88798828871065e-05, "loss": 2.286, "step": 3923 }, { "epoch": 0.52, "grad_norm": 0.291015625, "learning_rate": 4.887925632827582e-05, "loss": 2.2987, "step": 3924 }, { "epoch": 0.52, "grad_norm": 0.32421875, "learning_rate": 4.8878629598273326e-05, "loss": 2.2558, "step": 3925 }, { "epoch": 0.52, "grad_norm": 0.283203125, "learning_rate": 4.887800269710351e-05, "loss": 2.2756, "step": 3926 }, { "epoch": 0.52, "grad_norm": 0.294921875, "learning_rate": 4.887737562477086e-05, "loss": 2.2701, "step": 3927 }, { "epoch": 0.52, "grad_norm": 0.283203125, "learning_rate": 4.887674838127988e-05, "loss": 2.287, "step": 3928 }, { "epoch": 0.52, "grad_norm": 0.28515625, "learning_rate": 4.8876120966635056e-05, "loss": 2.287, "step": 3929 }, { "epoch": 0.52, "grad_norm": 0.287109375, "learning_rate": 4.8875493380840884e-05, "loss": 2.2744, "step": 3930 }, { "epoch": 0.52, "grad_norm": 0.28515625, "learning_rate": 4.887486562390187e-05, "loss": 2.2826, "step": 3931 }, { "epoch": 0.52, "grad_norm": 0.296875, "learning_rate": 4.887423769582251e-05, "loss": 2.2822, "step": 3932 }, { "epoch": 0.52, "grad_norm": 0.294921875, "learning_rate": 4.8873609596607315e-05, "loss": 2.2645, "step": 3933 }, { "epoch": 0.52, "grad_norm": 0.310546875, "learning_rate": 4.887298132626077e-05, "loss": 2.2619, "step": 3934 }, { "epoch": 0.52, "grad_norm": 0.314453125, "learning_rate": 4.88723528847874e-05, "loss": 2.2753, "step": 3935 }, { "epoch": 0.53, "grad_norm": 0.298828125, "learning_rate": 4.887172427219169e-05, "loss": 2.2926, "step": 3936 }, { "epoch": 0.53, "grad_norm": 0.3046875, "learning_rate": 4.887109548847816e-05, "loss": 2.2799, "step": 3937 }, { "epoch": 0.53, "grad_norm": 0.3046875, "learning_rate": 4.88704665336513e-05, "loss": 2.2804, "step": 3938 }, { "epoch": 0.53, "grad_norm": 0.287109375, "learning_rate": 4.8869837407715635e-05, "loss": 2.2847, "step": 3939 }, { "epoch": 0.53, "grad_norm": 0.29296875, "learning_rate": 4.8869208110675674e-05, "loss": 2.2739, "step": 3940 }, { "epoch": 0.53, "grad_norm": 0.294921875, "learning_rate": 4.8868578642535926e-05, "loss": 2.2732, "step": 3941 }, { "epoch": 0.53, "grad_norm": 0.296875, "learning_rate": 4.886794900330089e-05, "loss": 2.2995, "step": 3942 }, { "epoch": 0.53, "grad_norm": 0.296875, "learning_rate": 4.886731919297509e-05, "loss": 2.2807, "step": 3943 }, { "epoch": 0.53, "grad_norm": 0.30859375, "learning_rate": 4.886668921156305e-05, "loss": 2.2788, "step": 3944 }, { "epoch": 0.53, "grad_norm": 0.283203125, "learning_rate": 4.886605905906927e-05, "loss": 2.2767, "step": 3945 }, { "epoch": 0.53, "grad_norm": 0.291015625, "learning_rate": 4.8865428735498276e-05, "loss": 2.2441, "step": 3946 }, { "epoch": 0.53, "grad_norm": 0.306640625, "learning_rate": 4.886479824085458e-05, "loss": 2.2694, "step": 3947 }, { "epoch": 0.53, "grad_norm": 0.3046875, "learning_rate": 4.8864167575142705e-05, "loss": 2.2789, "step": 3948 }, { "epoch": 0.53, "grad_norm": 0.296875, "learning_rate": 4.8863536738367174e-05, "loss": 2.2509, "step": 3949 }, { "epoch": 0.53, "grad_norm": 0.3046875, "learning_rate": 4.886290573053251e-05, "loss": 2.2931, "step": 3950 }, { "epoch": 0.53, "grad_norm": 0.310546875, "learning_rate": 4.8862274551643235e-05, "loss": 2.2766, "step": 3951 }, { "epoch": 0.53, "grad_norm": 0.296875, "learning_rate": 4.8861643201703865e-05, "loss": 2.2622, "step": 3952 }, { "epoch": 0.53, "grad_norm": 0.291015625, "learning_rate": 4.886101168071894e-05, "loss": 2.3057, "step": 3953 }, { "epoch": 0.53, "grad_norm": 0.28515625, "learning_rate": 4.886037998869297e-05, "loss": 2.2865, "step": 3954 }, { "epoch": 0.53, "grad_norm": 0.29296875, "learning_rate": 4.88597481256305e-05, "loss": 2.2825, "step": 3955 }, { "epoch": 0.53, "grad_norm": 0.294921875, "learning_rate": 4.885911609153604e-05, "loss": 2.2465, "step": 3956 }, { "epoch": 0.53, "grad_norm": 0.3046875, "learning_rate": 4.885848388641414e-05, "loss": 2.2814, "step": 3957 }, { "epoch": 0.53, "grad_norm": 0.291015625, "learning_rate": 4.885785151026933e-05, "loss": 2.2787, "step": 3958 }, { "epoch": 0.53, "grad_norm": 0.296875, "learning_rate": 4.885721896310613e-05, "loss": 2.2801, "step": 3959 }, { "epoch": 0.53, "grad_norm": 0.326171875, "learning_rate": 4.885658624492908e-05, "loss": 2.3034, "step": 3960 }, { "epoch": 0.53, "grad_norm": 0.32421875, "learning_rate": 4.8855953355742724e-05, "loss": 2.2866, "step": 3961 }, { "epoch": 0.53, "grad_norm": 0.314453125, "learning_rate": 4.885532029555158e-05, "loss": 2.2783, "step": 3962 }, { "epoch": 0.53, "grad_norm": 0.298828125, "learning_rate": 4.88546870643602e-05, "loss": 2.2851, "step": 3963 }, { "epoch": 0.53, "grad_norm": 0.29296875, "learning_rate": 4.8854053662173124e-05, "loss": 2.2793, "step": 3964 }, { "epoch": 0.53, "grad_norm": 0.287109375, "learning_rate": 4.8853420088994896e-05, "loss": 2.2508, "step": 3965 }, { "epoch": 0.53, "grad_norm": 0.31640625, "learning_rate": 4.8852786344830036e-05, "loss": 2.273, "step": 3966 }, { "epoch": 0.53, "grad_norm": 0.30078125, "learning_rate": 4.885215242968311e-05, "loss": 2.3066, "step": 3967 }, { "epoch": 0.53, "grad_norm": 0.298828125, "learning_rate": 4.885151834355866e-05, "loss": 2.2734, "step": 3968 }, { "epoch": 0.53, "grad_norm": 0.30078125, "learning_rate": 4.885088408646121e-05, "loss": 2.2972, "step": 3969 }, { "epoch": 0.53, "grad_norm": 0.29296875, "learning_rate": 4.885024965839533e-05, "loss": 2.3003, "step": 3970 }, { "epoch": 0.53, "grad_norm": 0.318359375, "learning_rate": 4.884961505936555e-05, "loss": 2.2797, "step": 3971 }, { "epoch": 0.53, "grad_norm": 0.302734375, "learning_rate": 4.884898028937643e-05, "loss": 2.2641, "step": 3972 }, { "epoch": 0.53, "grad_norm": 0.28515625, "learning_rate": 4.8848345348432526e-05, "loss": 2.2786, "step": 3973 }, { "epoch": 0.53, "grad_norm": 0.306640625, "learning_rate": 4.884771023653838e-05, "loss": 2.2382, "step": 3974 }, { "epoch": 0.53, "grad_norm": 0.2890625, "learning_rate": 4.884707495369854e-05, "loss": 2.3004, "step": 3975 }, { "epoch": 0.53, "grad_norm": 0.302734375, "learning_rate": 4.8846439499917565e-05, "loss": 2.2707, "step": 3976 }, { "epoch": 0.53, "grad_norm": 0.2890625, "learning_rate": 4.884580387520002e-05, "loss": 2.2713, "step": 3977 }, { "epoch": 0.53, "grad_norm": 0.298828125, "learning_rate": 4.884516807955044e-05, "loss": 2.2789, "step": 3978 }, { "epoch": 0.53, "grad_norm": 0.2890625, "learning_rate": 4.884453211297341e-05, "loss": 2.2908, "step": 3979 }, { "epoch": 0.53, "grad_norm": 0.298828125, "learning_rate": 4.884389597547346e-05, "loss": 2.2699, "step": 3980 }, { "epoch": 0.53, "grad_norm": 0.31640625, "learning_rate": 4.8843259667055165e-05, "loss": 2.2722, "step": 3981 }, { "epoch": 0.53, "grad_norm": 0.2890625, "learning_rate": 4.884262318772309e-05, "loss": 2.2689, "step": 3982 }, { "epoch": 0.53, "grad_norm": 0.30078125, "learning_rate": 4.884198653748179e-05, "loss": 2.2774, "step": 3983 }, { "epoch": 0.53, "grad_norm": 0.302734375, "learning_rate": 4.884134971633583e-05, "loss": 2.2881, "step": 3984 }, { "epoch": 0.53, "grad_norm": 0.287109375, "learning_rate": 4.884071272428977e-05, "loss": 2.231, "step": 3985 }, { "epoch": 0.53, "grad_norm": 0.291015625, "learning_rate": 4.884007556134819e-05, "loss": 2.2948, "step": 3986 }, { "epoch": 0.53, "grad_norm": 0.279296875, "learning_rate": 4.883943822751564e-05, "loss": 2.2586, "step": 3987 }, { "epoch": 0.53, "grad_norm": 0.31640625, "learning_rate": 4.8838800722796705e-05, "loss": 2.253, "step": 3988 }, { "epoch": 0.53, "grad_norm": 0.30078125, "learning_rate": 4.883816304719595e-05, "loss": 2.273, "step": 3989 }, { "epoch": 0.53, "grad_norm": 0.30078125, "learning_rate": 4.883752520071794e-05, "loss": 2.2603, "step": 3990 }, { "epoch": 0.53, "grad_norm": 0.28125, "learning_rate": 4.883688718336724e-05, "loss": 2.2599, "step": 3991 }, { "epoch": 0.53, "grad_norm": 0.3046875, "learning_rate": 4.8836248995148445e-05, "loss": 2.2931, "step": 3992 }, { "epoch": 0.53, "grad_norm": 0.302734375, "learning_rate": 4.883561063606612e-05, "loss": 2.2779, "step": 3993 }, { "epoch": 0.53, "grad_norm": 0.279296875, "learning_rate": 4.883497210612483e-05, "loss": 2.2932, "step": 3994 }, { "epoch": 0.53, "grad_norm": 0.283203125, "learning_rate": 4.8834333405329177e-05, "loss": 2.2378, "step": 3995 }, { "epoch": 0.53, "grad_norm": 0.3046875, "learning_rate": 4.883369453368371e-05, "loss": 2.2743, "step": 3996 }, { "epoch": 0.53, "grad_norm": 0.30078125, "learning_rate": 4.8833055491193026e-05, "loss": 2.2782, "step": 3997 }, { "epoch": 0.53, "grad_norm": 0.2890625, "learning_rate": 4.8832416277861705e-05, "loss": 2.2664, "step": 3998 }, { "epoch": 0.53, "grad_norm": 0.2890625, "learning_rate": 4.883177689369433e-05, "loss": 2.2906, "step": 3999 }, { "epoch": 0.53, "grad_norm": 0.296875, "learning_rate": 4.883113733869547e-05, "loss": 2.2777, "step": 4000 }, { "epoch": 0.53, "eval_loss": 2.2694790363311768, "eval_runtime": 621.0916, "eval_samples_per_second": 62.424, "eval_steps_per_second": 7.804, "step": 4000 }, { "epoch": 0.53, "grad_norm": 0.2734375, "learning_rate": 4.883049761286973e-05, "loss": 2.3112, "step": 4001 }, { "epoch": 0.53, "grad_norm": 0.302734375, "learning_rate": 4.882985771622168e-05, "loss": 2.2782, "step": 4002 }, { "epoch": 0.53, "grad_norm": 0.298828125, "learning_rate": 4.882921764875591e-05, "loss": 2.2514, "step": 4003 }, { "epoch": 0.53, "grad_norm": 0.3203125, "learning_rate": 4.882857741047702e-05, "loss": 2.2745, "step": 4004 }, { "epoch": 0.53, "grad_norm": 0.3046875, "learning_rate": 4.882793700138958e-05, "loss": 2.2778, "step": 4005 }, { "epoch": 0.53, "grad_norm": 0.302734375, "learning_rate": 4.8827296421498206e-05, "loss": 2.2736, "step": 4006 }, { "epoch": 0.53, "grad_norm": 0.294921875, "learning_rate": 4.882665567080746e-05, "loss": 2.2684, "step": 4007 }, { "epoch": 0.53, "grad_norm": 0.294921875, "learning_rate": 4.882601474932195e-05, "loss": 2.2842, "step": 4008 }, { "epoch": 0.53, "grad_norm": 0.294921875, "learning_rate": 4.882537365704628e-05, "loss": 2.2684, "step": 4009 }, { "epoch": 0.53, "grad_norm": 0.30859375, "learning_rate": 4.882473239398503e-05, "loss": 2.2723, "step": 4010 }, { "epoch": 0.54, "grad_norm": 0.30078125, "learning_rate": 4.88240909601428e-05, "loss": 2.2833, "step": 4011 }, { "epoch": 0.54, "grad_norm": 0.283203125, "learning_rate": 4.882344935552419e-05, "loss": 2.254, "step": 4012 }, { "epoch": 0.54, "grad_norm": 0.298828125, "learning_rate": 4.88228075801338e-05, "loss": 2.2774, "step": 4013 }, { "epoch": 0.54, "grad_norm": 0.279296875, "learning_rate": 4.8822165633976235e-05, "loss": 2.282, "step": 4014 }, { "epoch": 0.54, "grad_norm": 0.3125, "learning_rate": 4.882152351705609e-05, "loss": 2.2691, "step": 4015 }, { "epoch": 0.54, "grad_norm": 0.30078125, "learning_rate": 4.882088122937796e-05, "loss": 2.2642, "step": 4016 }, { "epoch": 0.54, "grad_norm": 0.29296875, "learning_rate": 4.882023877094647e-05, "loss": 2.2629, "step": 4017 }, { "epoch": 0.54, "grad_norm": 0.283203125, "learning_rate": 4.88195961417662e-05, "loss": 2.2938, "step": 4018 }, { "epoch": 0.54, "grad_norm": 0.3046875, "learning_rate": 4.8818953341841775e-05, "loss": 2.2631, "step": 4019 }, { "epoch": 0.54, "grad_norm": 0.32421875, "learning_rate": 4.881831037117781e-05, "loss": 2.2703, "step": 4020 }, { "epoch": 0.54, "grad_norm": 0.298828125, "learning_rate": 4.8817667229778885e-05, "loss": 2.2936, "step": 4021 }, { "epoch": 0.54, "grad_norm": 0.330078125, "learning_rate": 4.8817023917649644e-05, "loss": 2.2569, "step": 4022 }, { "epoch": 0.54, "grad_norm": 0.30078125, "learning_rate": 4.8816380434794664e-05, "loss": 2.2597, "step": 4023 }, { "epoch": 0.54, "grad_norm": 0.287109375, "learning_rate": 4.881573678121858e-05, "loss": 2.2705, "step": 4024 }, { "epoch": 0.54, "grad_norm": 0.30078125, "learning_rate": 4.8815092956926e-05, "loss": 2.2686, "step": 4025 }, { "epoch": 0.54, "grad_norm": 0.28515625, "learning_rate": 4.881444896192155e-05, "loss": 2.2379, "step": 4026 }, { "epoch": 0.54, "grad_norm": 0.310546875, "learning_rate": 4.8813804796209826e-05, "loss": 2.2848, "step": 4027 }, { "epoch": 0.54, "grad_norm": 0.3046875, "learning_rate": 4.8813160459795457e-05, "loss": 2.2533, "step": 4028 }, { "epoch": 0.54, "grad_norm": 0.298828125, "learning_rate": 4.8812515952683064e-05, "loss": 2.2808, "step": 4029 }, { "epoch": 0.54, "grad_norm": 0.306640625, "learning_rate": 4.881187127487726e-05, "loss": 2.2633, "step": 4030 }, { "epoch": 0.54, "grad_norm": 0.294921875, "learning_rate": 4.881122642638267e-05, "loss": 2.2577, "step": 4031 }, { "epoch": 0.54, "grad_norm": 0.30078125, "learning_rate": 4.881058140720392e-05, "loss": 2.2697, "step": 4032 }, { "epoch": 0.54, "grad_norm": 0.30859375, "learning_rate": 4.880993621734562e-05, "loss": 2.2519, "step": 4033 }, { "epoch": 0.54, "grad_norm": 0.326171875, "learning_rate": 4.880929085681242e-05, "loss": 2.259, "step": 4034 }, { "epoch": 0.54, "grad_norm": 0.279296875, "learning_rate": 4.880864532560891e-05, "loss": 2.2632, "step": 4035 }, { "epoch": 0.54, "grad_norm": 0.30078125, "learning_rate": 4.8807999623739756e-05, "loss": 2.2675, "step": 4036 }, { "epoch": 0.54, "grad_norm": 0.2890625, "learning_rate": 4.880735375120956e-05, "loss": 2.2844, "step": 4037 }, { "epoch": 0.54, "grad_norm": 0.302734375, "learning_rate": 4.8806707708022956e-05, "loss": 2.2602, "step": 4038 }, { "epoch": 0.54, "grad_norm": 0.28125, "learning_rate": 4.880606149418459e-05, "loss": 2.2567, "step": 4039 }, { "epoch": 0.54, "grad_norm": 0.298828125, "learning_rate": 4.8805415109699074e-05, "loss": 2.2968, "step": 4040 }, { "epoch": 0.54, "grad_norm": 0.30078125, "learning_rate": 4.8804768554571054e-05, "loss": 2.2387, "step": 4041 }, { "epoch": 0.54, "grad_norm": 0.2890625, "learning_rate": 4.8804121828805164e-05, "loss": 2.303, "step": 4042 }, { "epoch": 0.54, "grad_norm": 0.298828125, "learning_rate": 4.880347493240603e-05, "loss": 2.2371, "step": 4043 }, { "epoch": 0.54, "grad_norm": 0.302734375, "learning_rate": 4.8802827865378306e-05, "loss": 2.2691, "step": 4044 }, { "epoch": 0.54, "grad_norm": 0.296875, "learning_rate": 4.880218062772662e-05, "loss": 2.2513, "step": 4045 }, { "epoch": 0.54, "grad_norm": 0.29296875, "learning_rate": 4.8801533219455605e-05, "loss": 2.2646, "step": 4046 }, { "epoch": 0.54, "grad_norm": 0.298828125, "learning_rate": 4.8800885640569914e-05, "loss": 2.2629, "step": 4047 }, { "epoch": 0.54, "grad_norm": 0.298828125, "learning_rate": 4.8800237891074183e-05, "loss": 2.2609, "step": 4048 }, { "epoch": 0.54, "grad_norm": 0.287109375, "learning_rate": 4.879958997097306e-05, "loss": 2.2708, "step": 4049 }, { "epoch": 0.54, "grad_norm": 0.28515625, "learning_rate": 4.8798941880271174e-05, "loss": 2.2856, "step": 4050 }, { "epoch": 0.54, "grad_norm": 0.298828125, "learning_rate": 4.879829361897319e-05, "loss": 2.3044, "step": 4051 }, { "epoch": 0.54, "grad_norm": 0.294921875, "learning_rate": 4.879764518708375e-05, "loss": 2.2552, "step": 4052 }, { "epoch": 0.54, "grad_norm": 0.296875, "learning_rate": 4.879699658460749e-05, "loss": 2.2617, "step": 4053 }, { "epoch": 0.54, "grad_norm": 0.291015625, "learning_rate": 4.8796347811549085e-05, "loss": 2.2664, "step": 4054 }, { "epoch": 0.54, "grad_norm": 0.296875, "learning_rate": 4.8795698867913155e-05, "loss": 2.2651, "step": 4055 }, { "epoch": 0.54, "grad_norm": 0.279296875, "learning_rate": 4.879504975370437e-05, "loss": 2.2328, "step": 4056 }, { "epoch": 0.54, "grad_norm": 0.326171875, "learning_rate": 4.879440046892738e-05, "loss": 2.2948, "step": 4057 }, { "epoch": 0.54, "grad_norm": 0.283203125, "learning_rate": 4.879375101358683e-05, "loss": 2.2826, "step": 4058 }, { "epoch": 0.54, "grad_norm": 0.29296875, "learning_rate": 4.8793101387687395e-05, "loss": 2.2875, "step": 4059 }, { "epoch": 0.54, "grad_norm": 0.302734375, "learning_rate": 4.879245159123371e-05, "loss": 2.2605, "step": 4060 }, { "epoch": 0.54, "grad_norm": 0.291015625, "learning_rate": 4.8791801624230457e-05, "loss": 2.2852, "step": 4061 }, { "epoch": 0.54, "grad_norm": 0.291015625, "learning_rate": 4.8791151486682276e-05, "loss": 2.2772, "step": 4062 }, { "epoch": 0.54, "grad_norm": 0.283203125, "learning_rate": 4.879050117859382e-05, "loss": 2.2845, "step": 4063 }, { "epoch": 0.54, "grad_norm": 0.291015625, "learning_rate": 4.8789850699969774e-05, "loss": 2.2421, "step": 4064 }, { "epoch": 0.54, "grad_norm": 0.291015625, "learning_rate": 4.8789200050814784e-05, "loss": 2.2834, "step": 4065 }, { "epoch": 0.54, "grad_norm": 0.298828125, "learning_rate": 4.878854923113353e-05, "loss": 2.282, "step": 4066 }, { "epoch": 0.54, "grad_norm": 0.294921875, "learning_rate": 4.8787898240930664e-05, "loss": 2.2941, "step": 4067 }, { "epoch": 0.54, "grad_norm": 0.30859375, "learning_rate": 4.878724708021085e-05, "loss": 2.261, "step": 4068 }, { "epoch": 0.54, "grad_norm": 0.28125, "learning_rate": 4.878659574897877e-05, "loss": 2.2515, "step": 4069 }, { "epoch": 0.54, "grad_norm": 0.2734375, "learning_rate": 4.8785944247239075e-05, "loss": 2.2497, "step": 4070 }, { "epoch": 0.54, "grad_norm": 0.275390625, "learning_rate": 4.8785292574996444e-05, "loss": 2.275, "step": 4071 }, { "epoch": 0.54, "grad_norm": 0.29296875, "learning_rate": 4.878464073225556e-05, "loss": 2.2722, "step": 4072 }, { "epoch": 0.54, "grad_norm": 0.287109375, "learning_rate": 4.8783988719021074e-05, "loss": 2.2834, "step": 4073 }, { "epoch": 0.54, "grad_norm": 0.283203125, "learning_rate": 4.878333653529767e-05, "loss": 2.2561, "step": 4074 }, { "epoch": 0.54, "grad_norm": 0.302734375, "learning_rate": 4.878268418109002e-05, "loss": 2.261, "step": 4075 }, { "epoch": 0.54, "grad_norm": 0.30078125, "learning_rate": 4.8782031656402815e-05, "loss": 2.2775, "step": 4076 }, { "epoch": 0.54, "grad_norm": 0.296875, "learning_rate": 4.878137896124071e-05, "loss": 2.2738, "step": 4077 }, { "epoch": 0.54, "grad_norm": 0.287109375, "learning_rate": 4.87807260956084e-05, "loss": 2.2889, "step": 4078 }, { "epoch": 0.54, "grad_norm": 0.30859375, "learning_rate": 4.878007305951055e-05, "loss": 2.2722, "step": 4079 }, { "epoch": 0.54, "grad_norm": 0.28125, "learning_rate": 4.8779419852951866e-05, "loss": 2.2707, "step": 4080 }, { "epoch": 0.54, "grad_norm": 0.29296875, "learning_rate": 4.8778766475937007e-05, "loss": 2.3051, "step": 4081 }, { "epoch": 0.54, "grad_norm": 0.287109375, "learning_rate": 4.877811292847066e-05, "loss": 2.2374, "step": 4082 }, { "epoch": 0.54, "grad_norm": 0.279296875, "learning_rate": 4.8777459210557516e-05, "loss": 2.2644, "step": 4083 }, { "epoch": 0.54, "grad_norm": 0.298828125, "learning_rate": 4.877680532220226e-05, "loss": 2.2872, "step": 4084 }, { "epoch": 0.54, "grad_norm": 0.283203125, "learning_rate": 4.8776151263409576e-05, "loss": 2.2696, "step": 4085 }, { "epoch": 0.55, "grad_norm": 0.31640625, "learning_rate": 4.877549703418416e-05, "loss": 2.2606, "step": 4086 }, { "epoch": 0.55, "grad_norm": 0.298828125, "learning_rate": 4.877484263453069e-05, "loss": 2.2764, "step": 4087 }, { "epoch": 0.55, "grad_norm": 0.2734375, "learning_rate": 4.8774188064453866e-05, "loss": 2.2431, "step": 4088 }, { "epoch": 0.55, "grad_norm": 0.29296875, "learning_rate": 4.8773533323958376e-05, "loss": 2.264, "step": 4089 }, { "epoch": 0.55, "grad_norm": 0.2890625, "learning_rate": 4.877287841304892e-05, "loss": 2.2754, "step": 4090 }, { "epoch": 0.55, "grad_norm": 0.318359375, "learning_rate": 4.877222333173018e-05, "loss": 2.245, "step": 4091 }, { "epoch": 0.55, "grad_norm": 0.29296875, "learning_rate": 4.8771568080006854e-05, "loss": 2.2656, "step": 4092 }, { "epoch": 0.55, "grad_norm": 0.287109375, "learning_rate": 4.877091265788366e-05, "loss": 2.2711, "step": 4093 }, { "epoch": 0.55, "grad_norm": 0.302734375, "learning_rate": 4.877025706536527e-05, "loss": 2.2528, "step": 4094 }, { "epoch": 0.55, "grad_norm": 0.287109375, "learning_rate": 4.8769601302456395e-05, "loss": 2.2738, "step": 4095 }, { "epoch": 0.55, "grad_norm": 0.294921875, "learning_rate": 4.876894536916172e-05, "loss": 2.2501, "step": 4096 }, { "epoch": 0.55, "grad_norm": 0.306640625, "learning_rate": 4.876828926548598e-05, "loss": 2.2632, "step": 4097 }, { "epoch": 0.55, "grad_norm": 0.294921875, "learning_rate": 4.8767632991433855e-05, "loss": 2.2262, "step": 4098 }, { "epoch": 0.55, "grad_norm": 0.296875, "learning_rate": 4.8766976547010046e-05, "loss": 2.2637, "step": 4099 }, { "epoch": 0.55, "grad_norm": 0.2734375, "learning_rate": 4.8766319932219274e-05, "loss": 2.2567, "step": 4100 }, { "epoch": 0.55, "grad_norm": 0.29296875, "learning_rate": 4.876566314706623e-05, "loss": 2.2503, "step": 4101 }, { "epoch": 0.55, "grad_norm": 0.3046875, "learning_rate": 4.876500619155564e-05, "loss": 2.2891, "step": 4102 }, { "epoch": 0.55, "grad_norm": 0.291015625, "learning_rate": 4.876434906569219e-05, "loss": 2.2867, "step": 4103 }, { "epoch": 0.55, "grad_norm": 0.296875, "learning_rate": 4.876369176948062e-05, "loss": 2.2617, "step": 4104 }, { "epoch": 0.55, "grad_norm": 0.306640625, "learning_rate": 4.876303430292561e-05, "loss": 2.2645, "step": 4105 }, { "epoch": 0.55, "grad_norm": 0.296875, "learning_rate": 4.876237666603189e-05, "loss": 2.2872, "step": 4106 }, { "epoch": 0.55, "grad_norm": 0.291015625, "learning_rate": 4.8761718858804185e-05, "loss": 2.2516, "step": 4107 }, { "epoch": 0.55, "grad_norm": 0.287109375, "learning_rate": 4.876106088124719e-05, "loss": 2.2644, "step": 4108 }, { "epoch": 0.55, "grad_norm": 0.310546875, "learning_rate": 4.876040273336562e-05, "loss": 2.2671, "step": 4109 }, { "epoch": 0.55, "grad_norm": 0.28515625, "learning_rate": 4.875974441516421e-05, "loss": 2.2773, "step": 4110 }, { "epoch": 0.55, "grad_norm": 0.29296875, "learning_rate": 4.8759085926647666e-05, "loss": 2.2551, "step": 4111 }, { "epoch": 0.55, "grad_norm": 0.3046875, "learning_rate": 4.8758427267820716e-05, "loss": 2.2994, "step": 4112 }, { "epoch": 0.55, "grad_norm": 0.29296875, "learning_rate": 4.8757768438688076e-05, "loss": 2.279, "step": 4113 }, { "epoch": 0.55, "grad_norm": 0.30078125, "learning_rate": 4.8757109439254474e-05, "loss": 2.268, "step": 4114 }, { "epoch": 0.55, "grad_norm": 0.29296875, "learning_rate": 4.875645026952462e-05, "loss": 2.2785, "step": 4115 }, { "epoch": 0.55, "grad_norm": 0.2890625, "learning_rate": 4.8755790929503264e-05, "loss": 2.2768, "step": 4116 }, { "epoch": 0.55, "grad_norm": 0.30078125, "learning_rate": 4.875513141919511e-05, "loss": 2.2572, "step": 4117 }, { "epoch": 0.55, "grad_norm": 0.298828125, "learning_rate": 4.875447173860489e-05, "loss": 2.3013, "step": 4118 }, { "epoch": 0.55, "grad_norm": 0.2890625, "learning_rate": 4.875381188773734e-05, "loss": 2.2884, "step": 4119 }, { "epoch": 0.55, "grad_norm": 0.291015625, "learning_rate": 4.875315186659719e-05, "loss": 2.3076, "step": 4120 }, { "epoch": 0.55, "grad_norm": 0.2890625, "learning_rate": 4.875249167518917e-05, "loss": 2.2619, "step": 4121 }, { "epoch": 0.55, "grad_norm": 0.294921875, "learning_rate": 4.8751831313518005e-05, "loss": 2.2955, "step": 4122 }, { "epoch": 0.55, "grad_norm": 0.298828125, "learning_rate": 4.875117078158843e-05, "loss": 2.2615, "step": 4123 }, { "epoch": 0.55, "grad_norm": 0.2890625, "learning_rate": 4.8750510079405186e-05, "loss": 2.2837, "step": 4124 }, { "epoch": 0.55, "grad_norm": 0.306640625, "learning_rate": 4.874984920697301e-05, "loss": 2.2635, "step": 4125 }, { "epoch": 0.55, "grad_norm": 0.28125, "learning_rate": 4.874918816429663e-05, "loss": 2.261, "step": 4126 }, { "epoch": 0.55, "grad_norm": 0.296875, "learning_rate": 4.874852695138079e-05, "loss": 2.2767, "step": 4127 }, { "epoch": 0.55, "grad_norm": 0.283203125, "learning_rate": 4.8747865568230235e-05, "loss": 2.2951, "step": 4128 }, { "epoch": 0.55, "grad_norm": 0.298828125, "learning_rate": 4.874720401484969e-05, "loss": 2.2503, "step": 4129 }, { "epoch": 0.55, "grad_norm": 0.29296875, "learning_rate": 4.874654229124391e-05, "loss": 2.2664, "step": 4130 }, { "epoch": 0.55, "grad_norm": 0.3203125, "learning_rate": 4.874588039741764e-05, "loss": 2.2387, "step": 4131 }, { "epoch": 0.55, "grad_norm": 0.306640625, "learning_rate": 4.874521833337563e-05, "loss": 2.2442, "step": 4132 }, { "epoch": 0.55, "grad_norm": 0.2890625, "learning_rate": 4.874455609912261e-05, "loss": 2.2872, "step": 4133 }, { "epoch": 0.55, "grad_norm": 0.28515625, "learning_rate": 4.874389369466333e-05, "loss": 2.2754, "step": 4134 }, { "epoch": 0.55, "grad_norm": 0.3046875, "learning_rate": 4.874323112000254e-05, "loss": 2.291, "step": 4135 }, { "epoch": 0.55, "grad_norm": 0.294921875, "learning_rate": 4.8742568375145e-05, "loss": 2.2353, "step": 4136 }, { "epoch": 0.55, "grad_norm": 0.306640625, "learning_rate": 4.8741905460095444e-05, "loss": 2.3095, "step": 4137 }, { "epoch": 0.55, "grad_norm": 0.29296875, "learning_rate": 4.8741242374858634e-05, "loss": 2.269, "step": 4138 }, { "epoch": 0.55, "grad_norm": 0.294921875, "learning_rate": 4.8740579119439325e-05, "loss": 2.2568, "step": 4139 }, { "epoch": 0.55, "grad_norm": 0.294921875, "learning_rate": 4.8739915693842267e-05, "loss": 2.2859, "step": 4140 }, { "epoch": 0.55, "grad_norm": 0.283203125, "learning_rate": 4.873925209807221e-05, "loss": 2.2882, "step": 4141 }, { "epoch": 0.55, "grad_norm": 0.28125, "learning_rate": 4.873858833213392e-05, "loss": 2.2615, "step": 4142 }, { "epoch": 0.55, "grad_norm": 0.294921875, "learning_rate": 4.873792439603215e-05, "loss": 2.2749, "step": 4143 }, { "epoch": 0.55, "grad_norm": 0.287109375, "learning_rate": 4.873726028977167e-05, "loss": 2.288, "step": 4144 }, { "epoch": 0.55, "grad_norm": 0.28125, "learning_rate": 4.873659601335723e-05, "loss": 2.2612, "step": 4145 }, { "epoch": 0.55, "grad_norm": 0.30078125, "learning_rate": 4.873593156679358e-05, "loss": 2.2844, "step": 4146 }, { "epoch": 0.55, "grad_norm": 0.294921875, "learning_rate": 4.873526695008551e-05, "loss": 2.2537, "step": 4147 }, { "epoch": 0.55, "grad_norm": 0.291015625, "learning_rate": 4.873460216323776e-05, "loss": 2.2713, "step": 4148 }, { "epoch": 0.55, "grad_norm": 0.28125, "learning_rate": 4.873393720625512e-05, "loss": 2.2569, "step": 4149 }, { "epoch": 0.55, "grad_norm": 0.279296875, "learning_rate": 4.873327207914233e-05, "loss": 2.2646, "step": 4150 }, { "epoch": 0.55, "grad_norm": 0.302734375, "learning_rate": 4.8732606781904175e-05, "loss": 2.2713, "step": 4151 }, { "epoch": 0.55, "grad_norm": 0.296875, "learning_rate": 4.873194131454541e-05, "loss": 2.2472, "step": 4152 }, { "epoch": 0.55, "grad_norm": 0.314453125, "learning_rate": 4.873127567707083e-05, "loss": 2.2731, "step": 4153 }, { "epoch": 0.55, "grad_norm": 0.283203125, "learning_rate": 4.873060986948518e-05, "loss": 2.2403, "step": 4154 }, { "epoch": 0.55, "grad_norm": 0.291015625, "learning_rate": 4.8729943891793245e-05, "loss": 2.2935, "step": 4155 }, { "epoch": 0.55, "grad_norm": 0.31640625, "learning_rate": 4.8729277743999804e-05, "loss": 2.251, "step": 4156 }, { "epoch": 0.55, "grad_norm": 0.330078125, "learning_rate": 4.872861142610962e-05, "loss": 2.2665, "step": 4157 }, { "epoch": 0.55, "grad_norm": 0.28125, "learning_rate": 4.872794493812747e-05, "loss": 2.2578, "step": 4158 }, { "epoch": 0.55, "grad_norm": 0.2890625, "learning_rate": 4.872727828005814e-05, "loss": 2.2577, "step": 4159 }, { "epoch": 0.55, "grad_norm": 0.298828125, "learning_rate": 4.87266114519064e-05, "loss": 2.246, "step": 4160 }, { "epoch": 0.56, "grad_norm": 0.291015625, "learning_rate": 4.872594445367704e-05, "loss": 2.3155, "step": 4161 }, { "epoch": 0.56, "grad_norm": 0.326171875, "learning_rate": 4.8725277285374835e-05, "loss": 2.2917, "step": 4162 }, { "epoch": 0.56, "grad_norm": 0.294921875, "learning_rate": 4.8724609947004576e-05, "loss": 2.2779, "step": 4163 }, { "epoch": 0.56, "grad_norm": 0.302734375, "learning_rate": 4.872394243857104e-05, "loss": 2.2619, "step": 4164 }, { "epoch": 0.56, "grad_norm": 0.2890625, "learning_rate": 4.8723274760078995e-05, "loss": 2.293, "step": 4165 }, { "epoch": 0.56, "grad_norm": 0.287109375, "learning_rate": 4.8722606911533256e-05, "loss": 2.2761, "step": 4166 }, { "epoch": 0.56, "grad_norm": 0.279296875, "learning_rate": 4.872193889293859e-05, "loss": 2.2685, "step": 4167 }, { "epoch": 0.56, "grad_norm": 0.29296875, "learning_rate": 4.872127070429979e-05, "loss": 2.2991, "step": 4168 }, { "epoch": 0.56, "grad_norm": 0.298828125, "learning_rate": 4.872060234562166e-05, "loss": 2.259, "step": 4169 }, { "epoch": 0.56, "grad_norm": 0.30078125, "learning_rate": 4.871993381690897e-05, "loss": 2.2694, "step": 4170 }, { "epoch": 0.56, "grad_norm": 0.298828125, "learning_rate": 4.871926511816653e-05, "loss": 2.2615, "step": 4171 }, { "epoch": 0.56, "grad_norm": 0.3125, "learning_rate": 4.871859624939912e-05, "loss": 2.2785, "step": 4172 }, { "epoch": 0.56, "grad_norm": 0.283203125, "learning_rate": 4.8717927210611534e-05, "loss": 2.2913, "step": 4173 }, { "epoch": 0.56, "grad_norm": 0.28515625, "learning_rate": 4.8717258001808585e-05, "loss": 2.2672, "step": 4174 }, { "epoch": 0.56, "grad_norm": 0.314453125, "learning_rate": 4.871658862299505e-05, "loss": 2.3142, "step": 4175 }, { "epoch": 0.56, "grad_norm": 0.30078125, "learning_rate": 4.871591907417574e-05, "loss": 2.2478, "step": 4176 }, { "epoch": 0.56, "grad_norm": 0.298828125, "learning_rate": 4.871524935535545e-05, "loss": 2.2918, "step": 4177 }, { "epoch": 0.56, "grad_norm": 0.318359375, "learning_rate": 4.8714579466538976e-05, "loss": 2.2566, "step": 4178 }, { "epoch": 0.56, "grad_norm": 0.31640625, "learning_rate": 4.871390940773113e-05, "loss": 2.302, "step": 4179 }, { "epoch": 0.56, "grad_norm": 0.275390625, "learning_rate": 4.8713239178936706e-05, "loss": 2.2726, "step": 4180 }, { "epoch": 0.56, "grad_norm": 0.31640625, "learning_rate": 4.871256878016052e-05, "loss": 2.2761, "step": 4181 }, { "epoch": 0.56, "grad_norm": 0.29296875, "learning_rate": 4.871189821140737e-05, "loss": 2.273, "step": 4182 }, { "epoch": 0.56, "grad_norm": 0.296875, "learning_rate": 4.871122747268205e-05, "loss": 2.2817, "step": 4183 }, { "epoch": 0.56, "grad_norm": 0.291015625, "learning_rate": 4.8710556563989394e-05, "loss": 2.2279, "step": 4184 }, { "epoch": 0.56, "grad_norm": 0.279296875, "learning_rate": 4.87098854853342e-05, "loss": 2.2774, "step": 4185 }, { "epoch": 0.56, "grad_norm": 0.302734375, "learning_rate": 4.8709214236721256e-05, "loss": 2.2371, "step": 4186 }, { "epoch": 0.56, "grad_norm": 0.306640625, "learning_rate": 4.870854281815541e-05, "loss": 2.277, "step": 4187 }, { "epoch": 0.56, "grad_norm": 0.296875, "learning_rate": 4.8707871229641456e-05, "loss": 2.2685, "step": 4188 }, { "epoch": 0.56, "grad_norm": 0.298828125, "learning_rate": 4.8707199471184206e-05, "loss": 2.2592, "step": 4189 }, { "epoch": 0.56, "grad_norm": 0.283203125, "learning_rate": 4.870652754278849e-05, "loss": 2.264, "step": 4190 }, { "epoch": 0.56, "grad_norm": 0.2890625, "learning_rate": 4.870585544445911e-05, "loss": 2.2938, "step": 4191 }, { "epoch": 0.56, "grad_norm": 0.310546875, "learning_rate": 4.870518317620089e-05, "loss": 2.2794, "step": 4192 }, { "epoch": 0.56, "grad_norm": 0.31640625, "learning_rate": 4.8704510738018636e-05, "loss": 2.2403, "step": 4193 }, { "epoch": 0.56, "grad_norm": 0.283203125, "learning_rate": 4.87038381299172e-05, "loss": 2.2871, "step": 4194 }, { "epoch": 0.56, "grad_norm": 0.30078125, "learning_rate": 4.870316535190137e-05, "loss": 2.2928, "step": 4195 }, { "epoch": 0.56, "grad_norm": 0.2890625, "learning_rate": 4.870249240397598e-05, "loss": 2.2442, "step": 4196 }, { "epoch": 0.56, "grad_norm": 0.29296875, "learning_rate": 4.870181928614586e-05, "loss": 2.2545, "step": 4197 }, { "epoch": 0.56, "grad_norm": 0.310546875, "learning_rate": 4.870114599841583e-05, "loss": 2.2775, "step": 4198 }, { "epoch": 0.56, "grad_norm": 0.287109375, "learning_rate": 4.870047254079072e-05, "loss": 2.2442, "step": 4199 }, { "epoch": 0.56, "grad_norm": 0.296875, "learning_rate": 4.8699798913275354e-05, "loss": 2.2694, "step": 4200 }, { "epoch": 0.56, "grad_norm": 0.310546875, "learning_rate": 4.869912511587455e-05, "loss": 2.2488, "step": 4201 }, { "epoch": 0.56, "grad_norm": 0.283203125, "learning_rate": 4.869845114859316e-05, "loss": 2.2852, "step": 4202 }, { "epoch": 0.56, "grad_norm": 0.296875, "learning_rate": 4.8697777011436006e-05, "loss": 2.2433, "step": 4203 }, { "epoch": 0.56, "grad_norm": 0.3203125, "learning_rate": 4.869710270440792e-05, "loss": 2.2832, "step": 4204 }, { "epoch": 0.56, "grad_norm": 0.291015625, "learning_rate": 4.869642822751373e-05, "loss": 2.2623, "step": 4205 }, { "epoch": 0.56, "grad_norm": 0.2734375, "learning_rate": 4.869575358075826e-05, "loss": 2.2634, "step": 4206 }, { "epoch": 0.56, "grad_norm": 0.302734375, "learning_rate": 4.8695078764146376e-05, "loss": 2.2799, "step": 4207 }, { "epoch": 0.56, "grad_norm": 0.306640625, "learning_rate": 4.8694403777682905e-05, "loss": 2.2687, "step": 4208 }, { "epoch": 0.56, "grad_norm": 0.318359375, "learning_rate": 4.869372862137267e-05, "loss": 2.2339, "step": 4209 }, { "epoch": 0.56, "grad_norm": 0.30078125, "learning_rate": 4.869305329522053e-05, "loss": 2.2843, "step": 4210 }, { "epoch": 0.56, "grad_norm": 0.291015625, "learning_rate": 4.869237779923131e-05, "loss": 2.2839, "step": 4211 }, { "epoch": 0.56, "grad_norm": 0.294921875, "learning_rate": 4.869170213340987e-05, "loss": 2.2591, "step": 4212 }, { "epoch": 0.56, "grad_norm": 0.294921875, "learning_rate": 4.8691026297761025e-05, "loss": 2.302, "step": 4213 }, { "epoch": 0.56, "grad_norm": 0.306640625, "learning_rate": 4.869035029228965e-05, "loss": 2.2741, "step": 4214 }, { "epoch": 0.56, "grad_norm": 0.287109375, "learning_rate": 4.868967411700057e-05, "loss": 2.2676, "step": 4215 }, { "epoch": 0.56, "grad_norm": 0.30859375, "learning_rate": 4.868899777189865e-05, "loss": 2.2913, "step": 4216 }, { "epoch": 0.56, "grad_norm": 0.310546875, "learning_rate": 4.868832125698872e-05, "loss": 2.2188, "step": 4217 }, { "epoch": 0.56, "grad_norm": 0.287109375, "learning_rate": 4.868764457227565e-05, "loss": 2.2744, "step": 4218 }, { "epoch": 0.56, "grad_norm": 0.29296875, "learning_rate": 4.868696771776427e-05, "loss": 2.2748, "step": 4219 }, { "epoch": 0.56, "grad_norm": 0.30078125, "learning_rate": 4.868629069345944e-05, "loss": 2.2386, "step": 4220 }, { "epoch": 0.56, "grad_norm": 0.28125, "learning_rate": 4.8685613499366e-05, "loss": 2.2588, "step": 4221 }, { "epoch": 0.56, "grad_norm": 0.31640625, "learning_rate": 4.868493613548883e-05, "loss": 2.2726, "step": 4222 }, { "epoch": 0.56, "grad_norm": 0.298828125, "learning_rate": 4.8684258601832775e-05, "loss": 2.278, "step": 4223 }, { "epoch": 0.56, "grad_norm": 0.298828125, "learning_rate": 4.8683580898402685e-05, "loss": 2.285, "step": 4224 }, { "epoch": 0.56, "grad_norm": 0.306640625, "learning_rate": 4.868290302520343e-05, "loss": 2.282, "step": 4225 }, { "epoch": 0.56, "grad_norm": 0.29296875, "learning_rate": 4.868222498223985e-05, "loss": 2.2332, "step": 4226 }, { "epoch": 0.56, "grad_norm": 0.32421875, "learning_rate": 4.8681546769516826e-05, "loss": 2.2835, "step": 4227 }, { "epoch": 0.56, "grad_norm": 0.3125, "learning_rate": 4.8680868387039206e-05, "loss": 2.3008, "step": 4228 }, { "epoch": 0.56, "grad_norm": 0.3125, "learning_rate": 4.868018983481186e-05, "loss": 2.2725, "step": 4229 }, { "epoch": 0.56, "grad_norm": 0.306640625, "learning_rate": 4.8679511112839646e-05, "loss": 2.2538, "step": 4230 }, { "epoch": 0.56, "grad_norm": 0.2890625, "learning_rate": 4.867883222112744e-05, "loss": 2.2574, "step": 4231 }, { "epoch": 0.56, "grad_norm": 0.330078125, "learning_rate": 4.8678153159680085e-05, "loss": 2.2708, "step": 4232 }, { "epoch": 0.56, "grad_norm": 0.3125, "learning_rate": 4.8677473928502476e-05, "loss": 2.2565, "step": 4233 }, { "epoch": 0.56, "grad_norm": 0.3125, "learning_rate": 4.867679452759947e-05, "loss": 2.2824, "step": 4234 }, { "epoch": 0.56, "grad_norm": 0.2890625, "learning_rate": 4.867611495697594e-05, "loss": 2.2914, "step": 4235 }, { "epoch": 0.57, "grad_norm": 0.287109375, "learning_rate": 4.867543521663675e-05, "loss": 2.2725, "step": 4236 }, { "epoch": 0.57, "grad_norm": 0.298828125, "learning_rate": 4.867475530658678e-05, "loss": 2.2961, "step": 4237 }, { "epoch": 0.57, "grad_norm": 0.294921875, "learning_rate": 4.8674075226830904e-05, "loss": 2.2688, "step": 4238 }, { "epoch": 0.57, "grad_norm": 0.291015625, "learning_rate": 4.867339497737399e-05, "loss": 2.2526, "step": 4239 }, { "epoch": 0.57, "grad_norm": 0.298828125, "learning_rate": 4.867271455822092e-05, "loss": 2.2753, "step": 4240 }, { "epoch": 0.57, "grad_norm": 0.296875, "learning_rate": 4.867203396937657e-05, "loss": 2.2845, "step": 4241 }, { "epoch": 0.57, "grad_norm": 0.30859375, "learning_rate": 4.8671353210845814e-05, "loss": 2.2572, "step": 4242 }, { "epoch": 0.57, "grad_norm": 0.3046875, "learning_rate": 4.867067228263355e-05, "loss": 2.2797, "step": 4243 }, { "epoch": 0.57, "grad_norm": 0.291015625, "learning_rate": 4.866999118474462e-05, "loss": 2.2752, "step": 4244 }, { "epoch": 0.57, "grad_norm": 0.30078125, "learning_rate": 4.8669309917183955e-05, "loss": 2.2664, "step": 4245 }, { "epoch": 0.57, "grad_norm": 0.298828125, "learning_rate": 4.86686284799564e-05, "loss": 2.2677, "step": 4246 }, { "epoch": 0.57, "grad_norm": 0.296875, "learning_rate": 4.8667946873066865e-05, "loss": 2.2663, "step": 4247 }, { "epoch": 0.57, "grad_norm": 0.30078125, "learning_rate": 4.866726509652022e-05, "loss": 2.2647, "step": 4248 }, { "epoch": 0.57, "grad_norm": 0.294921875, "learning_rate": 4.8666583150321364e-05, "loss": 2.2865, "step": 4249 }, { "epoch": 0.57, "grad_norm": 0.271484375, "learning_rate": 4.8665901034475177e-05, "loss": 2.2603, "step": 4250 }, { "epoch": 0.57, "grad_norm": 0.302734375, "learning_rate": 4.8665218748986545e-05, "loss": 2.2883, "step": 4251 }, { "epoch": 0.57, "grad_norm": 0.287109375, "learning_rate": 4.8664536293860375e-05, "loss": 2.2897, "step": 4252 }, { "epoch": 0.57, "grad_norm": 0.298828125, "learning_rate": 4.866385366910153e-05, "loss": 2.2514, "step": 4253 }, { "epoch": 0.57, "grad_norm": 0.298828125, "learning_rate": 4.8663170874714936e-05, "loss": 2.2408, "step": 4254 }, { "epoch": 0.57, "grad_norm": 0.298828125, "learning_rate": 4.866248791070547e-05, "loss": 2.2609, "step": 4255 }, { "epoch": 0.57, "grad_norm": 0.298828125, "learning_rate": 4.8661804777078034e-05, "loss": 2.2748, "step": 4256 }, { "epoch": 0.57, "grad_norm": 0.2890625, "learning_rate": 4.866112147383752e-05, "loss": 2.2764, "step": 4257 }, { "epoch": 0.57, "grad_norm": 0.28515625, "learning_rate": 4.866043800098883e-05, "loss": 2.2748, "step": 4258 }, { "epoch": 0.57, "grad_norm": 0.294921875, "learning_rate": 4.8659754358536855e-05, "loss": 2.2622, "step": 4259 }, { "epoch": 0.57, "grad_norm": 0.2890625, "learning_rate": 4.86590705464865e-05, "loss": 2.2668, "step": 4260 }, { "epoch": 0.57, "grad_norm": 0.31640625, "learning_rate": 4.865838656484267e-05, "loss": 2.2535, "step": 4261 }, { "epoch": 0.57, "grad_norm": 0.283203125, "learning_rate": 4.865770241361027e-05, "loss": 2.2438, "step": 4262 }, { "epoch": 0.57, "grad_norm": 0.296875, "learning_rate": 4.86570180927942e-05, "loss": 2.284, "step": 4263 }, { "epoch": 0.57, "grad_norm": 0.287109375, "learning_rate": 4.865633360239936e-05, "loss": 2.2694, "step": 4264 }, { "epoch": 0.57, "grad_norm": 0.30859375, "learning_rate": 4.865564894243066e-05, "loss": 2.3002, "step": 4265 }, { "epoch": 0.57, "grad_norm": 0.28515625, "learning_rate": 4.865496411289302e-05, "loss": 2.291, "step": 4266 }, { "epoch": 0.57, "grad_norm": 0.291015625, "learning_rate": 4.8654279113791336e-05, "loss": 2.2701, "step": 4267 }, { "epoch": 0.57, "grad_norm": 0.28125, "learning_rate": 4.8653593945130515e-05, "loss": 2.2703, "step": 4268 }, { "epoch": 0.57, "grad_norm": 0.3046875, "learning_rate": 4.865290860691548e-05, "loss": 2.2897, "step": 4269 }, { "epoch": 0.57, "grad_norm": 0.28125, "learning_rate": 4.865222309915114e-05, "loss": 2.258, "step": 4270 }, { "epoch": 0.57, "grad_norm": 0.29296875, "learning_rate": 4.8651537421842406e-05, "loss": 2.3027, "step": 4271 }, { "epoch": 0.57, "grad_norm": 0.28125, "learning_rate": 4.865085157499419e-05, "loss": 2.2598, "step": 4272 }, { "epoch": 0.57, "grad_norm": 0.28125, "learning_rate": 4.865016555861141e-05, "loss": 2.2788, "step": 4273 }, { "epoch": 0.57, "grad_norm": 0.29296875, "learning_rate": 4.8649479372698994e-05, "loss": 2.2565, "step": 4274 }, { "epoch": 0.57, "grad_norm": 0.28125, "learning_rate": 4.864879301726185e-05, "loss": 2.2911, "step": 4275 }, { "epoch": 0.57, "grad_norm": 0.26953125, "learning_rate": 4.86481064923049e-05, "loss": 2.2534, "step": 4276 }, { "epoch": 0.57, "grad_norm": 0.28515625, "learning_rate": 4.864741979783307e-05, "loss": 2.2908, "step": 4277 }, { "epoch": 0.57, "grad_norm": 0.2890625, "learning_rate": 4.8646732933851274e-05, "loss": 2.2288, "step": 4278 }, { "epoch": 0.57, "grad_norm": 0.3046875, "learning_rate": 4.864604590036444e-05, "loss": 2.2567, "step": 4279 }, { "epoch": 0.57, "grad_norm": 0.294921875, "learning_rate": 4.86453586973775e-05, "loss": 2.2219, "step": 4280 }, { "epoch": 0.57, "grad_norm": 0.28515625, "learning_rate": 4.864467132489536e-05, "loss": 2.2706, "step": 4281 }, { "epoch": 0.57, "grad_norm": 0.2890625, "learning_rate": 4.8643983782922966e-05, "loss": 2.2613, "step": 4282 }, { "epoch": 0.57, "grad_norm": 0.283203125, "learning_rate": 4.864329607146524e-05, "loss": 2.2548, "step": 4283 }, { "epoch": 0.57, "grad_norm": 0.322265625, "learning_rate": 4.8642608190527115e-05, "loss": 2.2859, "step": 4284 }, { "epoch": 0.57, "grad_norm": 0.28125, "learning_rate": 4.8641920140113514e-05, "loss": 2.265, "step": 4285 }, { "epoch": 0.57, "grad_norm": 0.27734375, "learning_rate": 4.8641231920229377e-05, "loss": 2.2486, "step": 4286 }, { "epoch": 0.57, "grad_norm": 0.294921875, "learning_rate": 4.8640543530879635e-05, "loss": 2.2773, "step": 4287 }, { "epoch": 0.57, "grad_norm": 0.296875, "learning_rate": 4.863985497206922e-05, "loss": 2.2612, "step": 4288 }, { "epoch": 0.57, "grad_norm": 0.30078125, "learning_rate": 4.863916624380307e-05, "loss": 2.3001, "step": 4289 }, { "epoch": 0.57, "grad_norm": 0.296875, "learning_rate": 4.863847734608612e-05, "loss": 2.2777, "step": 4290 }, { "epoch": 0.57, "grad_norm": 0.3125, "learning_rate": 4.863778827892331e-05, "loss": 2.2435, "step": 4291 }, { "epoch": 0.57, "grad_norm": 0.275390625, "learning_rate": 4.863709904231958e-05, "loss": 2.2743, "step": 4292 }, { "epoch": 0.57, "grad_norm": 0.287109375, "learning_rate": 4.863640963627988e-05, "loss": 2.2526, "step": 4293 }, { "epoch": 0.57, "grad_norm": 0.27734375, "learning_rate": 4.8635720060809126e-05, "loss": 2.2453, "step": 4294 }, { "epoch": 0.57, "grad_norm": 0.31640625, "learning_rate": 4.863503031591228e-05, "loss": 2.2957, "step": 4295 }, { "epoch": 0.57, "grad_norm": 0.296875, "learning_rate": 4.863434040159429e-05, "loss": 2.2891, "step": 4296 }, { "epoch": 0.57, "grad_norm": 0.306640625, "learning_rate": 4.863365031786009e-05, "loss": 2.2965, "step": 4297 }, { "epoch": 0.57, "grad_norm": 0.306640625, "learning_rate": 4.863296006471463e-05, "loss": 2.2746, "step": 4298 }, { "epoch": 0.57, "grad_norm": 0.302734375, "learning_rate": 4.863226964216286e-05, "loss": 2.2684, "step": 4299 }, { "epoch": 0.57, "grad_norm": 0.294921875, "learning_rate": 4.863157905020973e-05, "loss": 2.2768, "step": 4300 }, { "epoch": 0.57, "grad_norm": 0.2890625, "learning_rate": 4.863088828886018e-05, "loss": 2.2878, "step": 4301 }, { "epoch": 0.57, "grad_norm": 0.298828125, "learning_rate": 4.863019735811918e-05, "loss": 2.2579, "step": 4302 }, { "epoch": 0.57, "grad_norm": 0.30859375, "learning_rate": 4.862950625799166e-05, "loss": 2.263, "step": 4303 }, { "epoch": 0.57, "grad_norm": 0.28515625, "learning_rate": 4.8628814988482594e-05, "loss": 2.2671, "step": 4304 }, { "epoch": 0.57, "grad_norm": 0.287109375, "learning_rate": 4.862812354959694e-05, "loss": 2.2547, "step": 4305 }, { "epoch": 0.57, "grad_norm": 0.296875, "learning_rate": 4.862743194133963e-05, "loss": 2.2973, "step": 4306 }, { "epoch": 0.57, "grad_norm": 0.31640625, "learning_rate": 4.862674016371564e-05, "loss": 2.2514, "step": 4307 }, { "epoch": 0.57, "grad_norm": 0.283203125, "learning_rate": 4.8626048216729923e-05, "loss": 2.2952, "step": 4308 }, { "epoch": 0.57, "grad_norm": 0.291015625, "learning_rate": 4.862535610038744e-05, "loss": 2.2903, "step": 4309 }, { "epoch": 0.57, "grad_norm": 0.296875, "learning_rate": 4.8624663814693156e-05, "loss": 2.2816, "step": 4310 }, { "epoch": 0.58, "grad_norm": 0.294921875, "learning_rate": 4.8623971359652034e-05, "loss": 2.266, "step": 4311 }, { "epoch": 0.58, "grad_norm": 0.271484375, "learning_rate": 4.862327873526902e-05, "loss": 2.27, "step": 4312 }, { "epoch": 0.58, "grad_norm": 0.3046875, "learning_rate": 4.86225859415491e-05, "loss": 2.2409, "step": 4313 }, { "epoch": 0.58, "grad_norm": 0.275390625, "learning_rate": 4.862189297849724e-05, "loss": 2.2585, "step": 4314 }, { "epoch": 0.58, "grad_norm": 0.296875, "learning_rate": 4.8621199846118394e-05, "loss": 2.2905, "step": 4315 }, { "epoch": 0.58, "grad_norm": 0.298828125, "learning_rate": 4.8620506544417535e-05, "loss": 2.2594, "step": 4316 }, { "epoch": 0.58, "grad_norm": 0.3046875, "learning_rate": 4.861981307339964e-05, "loss": 2.2705, "step": 4317 }, { "epoch": 0.58, "grad_norm": 0.2890625, "learning_rate": 4.861911943306967e-05, "loss": 2.2814, "step": 4318 }, { "epoch": 0.58, "grad_norm": 0.291015625, "learning_rate": 4.8618425623432604e-05, "loss": 2.254, "step": 4319 }, { "epoch": 0.58, "grad_norm": 0.287109375, "learning_rate": 4.861773164449341e-05, "loss": 2.3047, "step": 4320 }, { "epoch": 0.58, "grad_norm": 0.291015625, "learning_rate": 4.861703749625707e-05, "loss": 2.269, "step": 4321 }, { "epoch": 0.58, "grad_norm": 0.28515625, "learning_rate": 4.8616343178728555e-05, "loss": 2.2386, "step": 4322 }, { "epoch": 0.58, "grad_norm": 0.271484375, "learning_rate": 4.8615648691912836e-05, "loss": 2.2842, "step": 4323 }, { "epoch": 0.58, "grad_norm": 0.287109375, "learning_rate": 4.86149540358149e-05, "loss": 2.2605, "step": 4324 }, { "epoch": 0.58, "grad_norm": 0.298828125, "learning_rate": 4.8614259210439726e-05, "loss": 2.288, "step": 4325 }, { "epoch": 0.58, "grad_norm": 0.306640625, "learning_rate": 4.861356421579229e-05, "loss": 2.2679, "step": 4326 }, { "epoch": 0.58, "grad_norm": 0.29296875, "learning_rate": 4.861286905187758e-05, "loss": 2.2822, "step": 4327 }, { "epoch": 0.58, "grad_norm": 0.283203125, "learning_rate": 4.861217371870057e-05, "loss": 2.2601, "step": 4328 }, { "epoch": 0.58, "grad_norm": 0.306640625, "learning_rate": 4.861147821626626e-05, "loss": 2.2741, "step": 4329 }, { "epoch": 0.58, "grad_norm": 0.3046875, "learning_rate": 4.861078254457962e-05, "loss": 2.2563, "step": 4330 }, { "epoch": 0.58, "grad_norm": 1.7265625, "learning_rate": 4.8610086703645644e-05, "loss": 2.2786, "step": 4331 }, { "epoch": 0.58, "grad_norm": 0.3046875, "learning_rate": 4.860939069346931e-05, "loss": 2.2645, "step": 4332 }, { "epoch": 0.58, "grad_norm": 0.3515625, "learning_rate": 4.860869451405563e-05, "loss": 2.2475, "step": 4333 }, { "epoch": 0.58, "grad_norm": 0.3125, "learning_rate": 4.8607998165409564e-05, "loss": 2.2498, "step": 4334 }, { "epoch": 0.58, "grad_norm": 0.28515625, "learning_rate": 4.860730164753613e-05, "loss": 2.2342, "step": 4335 }, { "epoch": 0.58, "grad_norm": 0.306640625, "learning_rate": 4.8606604960440305e-05, "loss": 2.2602, "step": 4336 }, { "epoch": 0.58, "grad_norm": 0.30859375, "learning_rate": 4.860590810412709e-05, "loss": 2.272, "step": 4337 }, { "epoch": 0.58, "grad_norm": 0.3203125, "learning_rate": 4.8605211078601476e-05, "loss": 2.254, "step": 4338 }, { "epoch": 0.58, "grad_norm": 0.314453125, "learning_rate": 4.8604513883868466e-05, "loss": 2.2871, "step": 4339 }, { "epoch": 0.58, "grad_norm": 0.330078125, "learning_rate": 4.8603816519933056e-05, "loss": 2.268, "step": 4340 }, { "epoch": 0.58, "grad_norm": 0.310546875, "learning_rate": 4.860311898680023e-05, "loss": 2.312, "step": 4341 }, { "epoch": 0.58, "grad_norm": 0.3125, "learning_rate": 4.860242128447501e-05, "loss": 2.2869, "step": 4342 }, { "epoch": 0.58, "grad_norm": 0.3125, "learning_rate": 4.8601723412962384e-05, "loss": 2.2718, "step": 4343 }, { "epoch": 0.58, "grad_norm": 0.318359375, "learning_rate": 4.860102537226736e-05, "loss": 2.2533, "step": 4344 }, { "epoch": 0.58, "grad_norm": 0.3203125, "learning_rate": 4.860032716239494e-05, "loss": 2.2846, "step": 4345 }, { "epoch": 0.58, "grad_norm": 0.3125, "learning_rate": 4.8599628783350135e-05, "loss": 2.246, "step": 4346 }, { "epoch": 0.58, "grad_norm": 0.30078125, "learning_rate": 4.8598930235137934e-05, "loss": 2.2692, "step": 4347 }, { "epoch": 0.58, "grad_norm": 0.302734375, "learning_rate": 4.859823151776336e-05, "loss": 2.2482, "step": 4348 }, { "epoch": 0.58, "grad_norm": 0.28125, "learning_rate": 4.859753263123141e-05, "loss": 2.2565, "step": 4349 }, { "epoch": 0.58, "grad_norm": 0.306640625, "learning_rate": 4.859683357554711e-05, "loss": 2.2958, "step": 4350 }, { "epoch": 0.58, "grad_norm": 0.306640625, "learning_rate": 4.859613435071546e-05, "loss": 2.2783, "step": 4351 }, { "epoch": 0.58, "grad_norm": 0.318359375, "learning_rate": 4.859543495674147e-05, "loss": 2.2667, "step": 4352 }, { "epoch": 0.58, "grad_norm": 0.3046875, "learning_rate": 4.859473539363016e-05, "loss": 2.269, "step": 4353 }, { "epoch": 0.58, "grad_norm": 0.29296875, "learning_rate": 4.859403566138654e-05, "loss": 2.2747, "step": 4354 }, { "epoch": 0.58, "grad_norm": 0.3203125, "learning_rate": 4.859333576001564e-05, "loss": 2.2854, "step": 4355 }, { "epoch": 0.58, "grad_norm": 0.3125, "learning_rate": 4.859263568952245e-05, "loss": 2.2871, "step": 4356 }, { "epoch": 0.58, "grad_norm": 0.294921875, "learning_rate": 4.8591935449912e-05, "loss": 2.2968, "step": 4357 }, { "epoch": 0.58, "grad_norm": 0.29296875, "learning_rate": 4.859123504118932e-05, "loss": 2.2587, "step": 4358 }, { "epoch": 0.58, "grad_norm": 0.30859375, "learning_rate": 4.859053446335942e-05, "loss": 2.2839, "step": 4359 }, { "epoch": 0.58, "grad_norm": 0.3046875, "learning_rate": 4.858983371642732e-05, "loss": 2.2867, "step": 4360 }, { "epoch": 0.58, "grad_norm": 0.283203125, "learning_rate": 4.8589132800398064e-05, "loss": 2.2742, "step": 4361 }, { "epoch": 0.58, "grad_norm": 0.310546875, "learning_rate": 4.858843171527665e-05, "loss": 2.2713, "step": 4362 }, { "epoch": 0.58, "grad_norm": 0.30078125, "learning_rate": 4.858773046106811e-05, "loss": 2.2777, "step": 4363 }, { "epoch": 0.58, "grad_norm": 0.298828125, "learning_rate": 4.858702903777749e-05, "loss": 2.292, "step": 4364 }, { "epoch": 0.58, "grad_norm": 0.28125, "learning_rate": 4.858632744540979e-05, "loss": 2.2589, "step": 4365 }, { "epoch": 0.58, "grad_norm": 0.31640625, "learning_rate": 4.858562568397005e-05, "loss": 2.2666, "step": 4366 }, { "epoch": 0.58, "grad_norm": 0.291015625, "learning_rate": 4.858492375346331e-05, "loss": 2.3022, "step": 4367 }, { "epoch": 0.58, "grad_norm": 0.30859375, "learning_rate": 4.858422165389459e-05, "loss": 2.2692, "step": 4368 }, { "epoch": 0.58, "grad_norm": 0.30078125, "learning_rate": 4.858351938526893e-05, "loss": 2.2766, "step": 4369 }, { "epoch": 0.58, "grad_norm": 0.302734375, "learning_rate": 4.8582816947591346e-05, "loss": 2.2625, "step": 4370 }, { "epoch": 0.58, "grad_norm": 0.302734375, "learning_rate": 4.8582114340866904e-05, "loss": 2.2564, "step": 4371 }, { "epoch": 0.58, "grad_norm": 0.294921875, "learning_rate": 4.858141156510062e-05, "loss": 2.269, "step": 4372 }, { "epoch": 0.58, "grad_norm": 0.28515625, "learning_rate": 4.858070862029753e-05, "loss": 2.275, "step": 4373 }, { "epoch": 0.58, "grad_norm": 0.306640625, "learning_rate": 4.858000550646269e-05, "loss": 2.2757, "step": 4374 }, { "epoch": 0.58, "grad_norm": 0.30078125, "learning_rate": 4.857930222360112e-05, "loss": 2.3126, "step": 4375 }, { "epoch": 0.58, "grad_norm": 0.29296875, "learning_rate": 4.857859877171788e-05, "loss": 2.2717, "step": 4376 }, { "epoch": 0.58, "grad_norm": 0.298828125, "learning_rate": 4.857789515081799e-05, "loss": 2.2786, "step": 4377 }, { "epoch": 0.58, "grad_norm": 0.28125, "learning_rate": 4.857719136090652e-05, "loss": 2.2726, "step": 4378 }, { "epoch": 0.58, "grad_norm": 0.287109375, "learning_rate": 4.857648740198849e-05, "loss": 2.2722, "step": 4379 }, { "epoch": 0.58, "grad_norm": 0.28515625, "learning_rate": 4.8575783274068966e-05, "loss": 2.2436, "step": 4380 }, { "epoch": 0.58, "grad_norm": 0.294921875, "learning_rate": 4.857507897715298e-05, "loss": 2.2695, "step": 4381 }, { "epoch": 0.58, "grad_norm": 0.318359375, "learning_rate": 4.857437451124559e-05, "loss": 2.2649, "step": 4382 }, { "epoch": 0.58, "grad_norm": 0.28125, "learning_rate": 4.857366987635184e-05, "loss": 2.2674, "step": 4383 }, { "epoch": 0.58, "grad_norm": 0.28125, "learning_rate": 4.857296507247678e-05, "loss": 2.2423, "step": 4384 }, { "epoch": 0.58, "grad_norm": 0.298828125, "learning_rate": 4.857226009962548e-05, "loss": 2.2577, "step": 4385 }, { "epoch": 0.59, "grad_norm": 0.283203125, "learning_rate": 4.8571554957802964e-05, "loss": 2.2563, "step": 4386 }, { "epoch": 0.59, "grad_norm": 0.28125, "learning_rate": 4.8570849647014316e-05, "loss": 2.2874, "step": 4387 }, { "epoch": 0.59, "grad_norm": 0.2890625, "learning_rate": 4.857014416726458e-05, "loss": 2.2587, "step": 4388 }, { "epoch": 0.59, "grad_norm": 0.30859375, "learning_rate": 4.8569438518558795e-05, "loss": 2.2667, "step": 4389 }, { "epoch": 0.59, "grad_norm": 0.306640625, "learning_rate": 4.8568732700902046e-05, "loss": 2.2988, "step": 4390 }, { "epoch": 0.59, "grad_norm": 0.306640625, "learning_rate": 4.8568026714299385e-05, "loss": 2.27, "step": 4391 }, { "epoch": 0.59, "grad_norm": 0.2890625, "learning_rate": 4.856732055875586e-05, "loss": 2.2665, "step": 4392 }, { "epoch": 0.59, "grad_norm": 0.287109375, "learning_rate": 4.8566614234276555e-05, "loss": 2.2783, "step": 4393 }, { "epoch": 0.59, "grad_norm": 0.275390625, "learning_rate": 4.856590774086651e-05, "loss": 2.2723, "step": 4394 }, { "epoch": 0.59, "grad_norm": 0.33984375, "learning_rate": 4.85652010785308e-05, "loss": 2.2504, "step": 4395 }, { "epoch": 0.59, "grad_norm": 0.291015625, "learning_rate": 4.8564494247274494e-05, "loss": 2.256, "step": 4396 }, { "epoch": 0.59, "grad_norm": 0.287109375, "learning_rate": 4.8563787247102654e-05, "loss": 2.2931, "step": 4397 }, { "epoch": 0.59, "grad_norm": 0.298828125, "learning_rate": 4.856308007802035e-05, "loss": 2.2595, "step": 4398 }, { "epoch": 0.59, "grad_norm": 0.302734375, "learning_rate": 4.8562372740032646e-05, "loss": 2.2725, "step": 4399 }, { "epoch": 0.59, "grad_norm": 0.28515625, "learning_rate": 4.856166523314462e-05, "loss": 2.2735, "step": 4400 }, { "epoch": 0.59, "grad_norm": 0.294921875, "learning_rate": 4.856095755736134e-05, "loss": 2.302, "step": 4401 }, { "epoch": 0.59, "grad_norm": 0.302734375, "learning_rate": 4.8560249712687874e-05, "loss": 2.3051, "step": 4402 }, { "epoch": 0.59, "grad_norm": 0.296875, "learning_rate": 4.855954169912931e-05, "loss": 2.2582, "step": 4403 }, { "epoch": 0.59, "grad_norm": 0.294921875, "learning_rate": 4.855883351669071e-05, "loss": 2.2764, "step": 4404 }, { "epoch": 0.59, "grad_norm": 0.29296875, "learning_rate": 4.855812516537715e-05, "loss": 2.285, "step": 4405 }, { "epoch": 0.59, "grad_norm": 0.2890625, "learning_rate": 4.855741664519372e-05, "loss": 2.2532, "step": 4406 }, { "epoch": 0.59, "grad_norm": 0.310546875, "learning_rate": 4.855670795614548e-05, "loss": 2.2521, "step": 4407 }, { "epoch": 0.59, "grad_norm": 0.29296875, "learning_rate": 4.855599909823754e-05, "loss": 2.2765, "step": 4408 }, { "epoch": 0.59, "grad_norm": 0.296875, "learning_rate": 4.855529007147495e-05, "loss": 2.2645, "step": 4409 }, { "epoch": 0.59, "grad_norm": 0.283203125, "learning_rate": 4.85545808758628e-05, "loss": 2.2895, "step": 4410 }, { "epoch": 0.59, "grad_norm": 0.2890625, "learning_rate": 4.855387151140618e-05, "loss": 2.2491, "step": 4411 }, { "epoch": 0.59, "grad_norm": 0.2890625, "learning_rate": 4.855316197811018e-05, "loss": 2.2644, "step": 4412 }, { "epoch": 0.59, "grad_norm": 0.302734375, "learning_rate": 4.855245227597988e-05, "loss": 2.2673, "step": 4413 }, { "epoch": 0.59, "grad_norm": 0.279296875, "learning_rate": 4.855174240502035e-05, "loss": 2.2483, "step": 4414 }, { "epoch": 0.59, "grad_norm": 0.283203125, "learning_rate": 4.8551032365236715e-05, "loss": 2.2608, "step": 4415 }, { "epoch": 0.59, "grad_norm": 0.29296875, "learning_rate": 4.855032215663403e-05, "loss": 2.2389, "step": 4416 }, { "epoch": 0.59, "grad_norm": 0.29296875, "learning_rate": 4.8549611779217406e-05, "loss": 2.2614, "step": 4417 }, { "epoch": 0.59, "grad_norm": 0.291015625, "learning_rate": 4.854890123299194e-05, "loss": 2.2541, "step": 4418 }, { "epoch": 0.59, "grad_norm": 0.294921875, "learning_rate": 4.85481905179627e-05, "loss": 2.2835, "step": 4419 }, { "epoch": 0.59, "grad_norm": 0.314453125, "learning_rate": 4.8547479634134806e-05, "loss": 2.2659, "step": 4420 }, { "epoch": 0.59, "grad_norm": 0.298828125, "learning_rate": 4.854676858151333e-05, "loss": 2.2703, "step": 4421 }, { "epoch": 0.59, "grad_norm": 0.29296875, "learning_rate": 4.85460573601034e-05, "loss": 2.2545, "step": 4422 }, { "epoch": 0.59, "grad_norm": 0.291015625, "learning_rate": 4.8545345969910086e-05, "loss": 2.2937, "step": 4423 }, { "epoch": 0.59, "grad_norm": 0.306640625, "learning_rate": 4.8544634410938504e-05, "loss": 2.2682, "step": 4424 }, { "epoch": 0.59, "grad_norm": 0.283203125, "learning_rate": 4.8543922683193744e-05, "loss": 2.2495, "step": 4425 }, { "epoch": 0.59, "grad_norm": 0.291015625, "learning_rate": 4.8543210786680915e-05, "loss": 2.2453, "step": 4426 }, { "epoch": 0.59, "grad_norm": 0.2890625, "learning_rate": 4.854249872140512e-05, "loss": 2.285, "step": 4427 }, { "epoch": 0.59, "grad_norm": 0.28515625, "learning_rate": 4.854178648737145e-05, "loss": 2.2648, "step": 4428 }, { "epoch": 0.59, "grad_norm": 0.3046875, "learning_rate": 4.854107408458504e-05, "loss": 2.2841, "step": 4429 }, { "epoch": 0.59, "grad_norm": 0.271484375, "learning_rate": 4.854036151305097e-05, "loss": 2.2963, "step": 4430 }, { "epoch": 0.59, "grad_norm": 0.302734375, "learning_rate": 4.853964877277435e-05, "loss": 2.2452, "step": 4431 }, { "epoch": 0.59, "grad_norm": 0.287109375, "learning_rate": 4.8538935863760296e-05, "loss": 2.2578, "step": 4432 }, { "epoch": 0.59, "grad_norm": 0.298828125, "learning_rate": 4.853822278601392e-05, "loss": 2.2593, "step": 4433 }, { "epoch": 0.59, "grad_norm": 0.283203125, "learning_rate": 4.853750953954033e-05, "loss": 2.2496, "step": 4434 }, { "epoch": 0.59, "grad_norm": 0.302734375, "learning_rate": 4.853679612434464e-05, "loss": 2.2626, "step": 4435 }, { "epoch": 0.59, "grad_norm": 0.443359375, "learning_rate": 4.853608254043196e-05, "loss": 2.2417, "step": 4436 }, { "epoch": 0.59, "grad_norm": 0.291015625, "learning_rate": 4.853536878780741e-05, "loss": 2.2629, "step": 4437 }, { "epoch": 0.59, "grad_norm": 0.291015625, "learning_rate": 4.853465486647611e-05, "loss": 2.2859, "step": 4438 }, { "epoch": 0.59, "grad_norm": 0.296875, "learning_rate": 4.853394077644317e-05, "loss": 2.2906, "step": 4439 }, { "epoch": 0.59, "grad_norm": 0.298828125, "learning_rate": 4.85332265177137e-05, "loss": 2.2463, "step": 4440 }, { "epoch": 0.59, "grad_norm": 0.3125, "learning_rate": 4.853251209029285e-05, "loss": 2.2685, "step": 4441 }, { "epoch": 0.59, "grad_norm": 0.314453125, "learning_rate": 4.853179749418571e-05, "loss": 2.2791, "step": 4442 }, { "epoch": 0.59, "grad_norm": 0.3125, "learning_rate": 4.8531082729397416e-05, "loss": 2.3002, "step": 4443 }, { "epoch": 0.59, "grad_norm": 0.310546875, "learning_rate": 4.85303677959331e-05, "loss": 2.2624, "step": 4444 }, { "epoch": 0.59, "grad_norm": 0.302734375, "learning_rate": 4.852965269379787e-05, "loss": 2.3048, "step": 4445 }, { "epoch": 0.59, "grad_norm": 0.298828125, "learning_rate": 4.852893742299685e-05, "loss": 2.2519, "step": 4446 }, { "epoch": 0.59, "grad_norm": 0.271484375, "learning_rate": 4.8528221983535185e-05, "loss": 2.2706, "step": 4447 }, { "epoch": 0.59, "grad_norm": 0.29296875, "learning_rate": 4.8527506375417995e-05, "loss": 2.3001, "step": 4448 }, { "epoch": 0.59, "grad_norm": 0.333984375, "learning_rate": 4.8526790598650405e-05, "loss": 2.2832, "step": 4449 }, { "epoch": 0.59, "grad_norm": 0.322265625, "learning_rate": 4.8526074653237554e-05, "loss": 2.259, "step": 4450 }, { "epoch": 0.59, "grad_norm": 0.294921875, "learning_rate": 4.8525358539184575e-05, "loss": 2.2783, "step": 4451 }, { "epoch": 0.59, "grad_norm": 0.298828125, "learning_rate": 4.852464225649659e-05, "loss": 2.2596, "step": 4452 }, { "epoch": 0.59, "grad_norm": 0.314453125, "learning_rate": 4.852392580517874e-05, "loss": 2.2695, "step": 4453 }, { "epoch": 0.59, "grad_norm": 0.318359375, "learning_rate": 4.852320918523616e-05, "loss": 2.281, "step": 4454 }, { "epoch": 0.59, "grad_norm": 0.310546875, "learning_rate": 4.852249239667399e-05, "loss": 2.2866, "step": 4455 }, { "epoch": 0.59, "grad_norm": 0.3046875, "learning_rate": 4.852177543949736e-05, "loss": 2.2753, "step": 4456 }, { "epoch": 0.59, "grad_norm": 0.279296875, "learning_rate": 4.852105831371142e-05, "loss": 2.2734, "step": 4457 }, { "epoch": 0.59, "grad_norm": 0.302734375, "learning_rate": 4.8520341019321305e-05, "loss": 2.2816, "step": 4458 }, { "epoch": 0.59, "grad_norm": 0.298828125, "learning_rate": 4.851962355633216e-05, "loss": 2.2681, "step": 4459 }, { "epoch": 0.59, "grad_norm": 0.287109375, "learning_rate": 4.851890592474912e-05, "loss": 2.2763, "step": 4460 }, { "epoch": 0.6, "grad_norm": 0.287109375, "learning_rate": 4.851818812457734e-05, "loss": 2.2761, "step": 4461 }, { "epoch": 0.6, "grad_norm": 0.306640625, "learning_rate": 4.851747015582196e-05, "loss": 2.2522, "step": 4462 }, { "epoch": 0.6, "grad_norm": 0.291015625, "learning_rate": 4.851675201848812e-05, "loss": 2.2797, "step": 4463 }, { "epoch": 0.6, "grad_norm": 0.3046875, "learning_rate": 4.851603371258098e-05, "loss": 2.2664, "step": 4464 }, { "epoch": 0.6, "grad_norm": 0.283203125, "learning_rate": 4.851531523810567e-05, "loss": 2.2667, "step": 4465 }, { "epoch": 0.6, "grad_norm": 0.310546875, "learning_rate": 4.851459659506737e-05, "loss": 2.2715, "step": 4466 }, { "epoch": 0.6, "grad_norm": 0.296875, "learning_rate": 4.851387778347121e-05, "loss": 2.2338, "step": 4467 }, { "epoch": 0.6, "grad_norm": 0.3125, "learning_rate": 4.851315880332235e-05, "loss": 2.2878, "step": 4468 }, { "epoch": 0.6, "grad_norm": 0.28515625, "learning_rate": 4.851243965462593e-05, "loss": 2.2741, "step": 4469 }, { "epoch": 0.6, "grad_norm": 0.3203125, "learning_rate": 4.851172033738712e-05, "loss": 2.2474, "step": 4470 }, { "epoch": 0.6, "grad_norm": 0.302734375, "learning_rate": 4.851100085161107e-05, "loss": 2.2643, "step": 4471 }, { "epoch": 0.6, "grad_norm": 0.291015625, "learning_rate": 4.8510281197302945e-05, "loss": 2.2658, "step": 4472 }, { "epoch": 0.6, "grad_norm": 0.296875, "learning_rate": 4.850956137446789e-05, "loss": 2.2741, "step": 4473 }, { "epoch": 0.6, "grad_norm": 0.30078125, "learning_rate": 4.850884138311108e-05, "loss": 2.2422, "step": 4474 }, { "epoch": 0.6, "grad_norm": 0.29296875, "learning_rate": 4.8508121223237676e-05, "loss": 2.245, "step": 4475 }, { "epoch": 0.6, "grad_norm": 0.283203125, "learning_rate": 4.8507400894852825e-05, "loss": 2.3108, "step": 4476 }, { "epoch": 0.6, "grad_norm": 0.3046875, "learning_rate": 4.85066803979617e-05, "loss": 2.2588, "step": 4477 }, { "epoch": 0.6, "grad_norm": 0.302734375, "learning_rate": 4.850595973256946e-05, "loss": 2.2994, "step": 4478 }, { "epoch": 0.6, "grad_norm": 0.30078125, "learning_rate": 4.8505238898681284e-05, "loss": 2.2638, "step": 4479 }, { "epoch": 0.6, "grad_norm": 0.318359375, "learning_rate": 4.850451789630233e-05, "loss": 2.3084, "step": 4480 }, { "epoch": 0.6, "grad_norm": 0.28515625, "learning_rate": 4.850379672543775e-05, "loss": 2.266, "step": 4481 }, { "epoch": 0.6, "grad_norm": 0.29296875, "learning_rate": 4.850307538609275e-05, "loss": 2.2957, "step": 4482 }, { "epoch": 0.6, "grad_norm": 0.298828125, "learning_rate": 4.8502353878272474e-05, "loss": 2.2567, "step": 4483 }, { "epoch": 0.6, "grad_norm": 0.296875, "learning_rate": 4.8501632201982105e-05, "loss": 2.2219, "step": 4484 }, { "epoch": 0.6, "grad_norm": 0.306640625, "learning_rate": 4.8500910357226804e-05, "loss": 2.2372, "step": 4485 }, { "epoch": 0.6, "grad_norm": 0.296875, "learning_rate": 4.850018834401176e-05, "loss": 2.3156, "step": 4486 }, { "epoch": 0.6, "grad_norm": 0.3125, "learning_rate": 4.8499466162342144e-05, "loss": 2.2537, "step": 4487 }, { "epoch": 0.6, "grad_norm": 0.2890625, "learning_rate": 4.849874381222313e-05, "loss": 2.2464, "step": 4488 }, { "epoch": 0.6, "grad_norm": 0.30859375, "learning_rate": 4.849802129365989e-05, "loss": 2.2783, "step": 4489 }, { "epoch": 0.6, "grad_norm": 0.30078125, "learning_rate": 4.8497298606657616e-05, "loss": 2.2466, "step": 4490 }, { "epoch": 0.6, "grad_norm": 0.298828125, "learning_rate": 4.849657575122148e-05, "loss": 2.275, "step": 4491 }, { "epoch": 0.6, "grad_norm": 0.298828125, "learning_rate": 4.849585272735666e-05, "loss": 2.2596, "step": 4492 }, { "epoch": 0.6, "grad_norm": 0.298828125, "learning_rate": 4.849512953506835e-05, "loss": 2.2616, "step": 4493 }, { "epoch": 0.6, "grad_norm": 0.28515625, "learning_rate": 4.849440617436173e-05, "loss": 2.2572, "step": 4494 }, { "epoch": 0.6, "grad_norm": 0.291015625, "learning_rate": 4.849368264524199e-05, "loss": 2.2396, "step": 4495 }, { "epoch": 0.6, "grad_norm": 0.28515625, "learning_rate": 4.84929589477143e-05, "loss": 2.225, "step": 4496 }, { "epoch": 0.6, "grad_norm": 0.328125, "learning_rate": 4.849223508178386e-05, "loss": 2.2526, "step": 4497 }, { "epoch": 0.6, "grad_norm": 0.27734375, "learning_rate": 4.849151104745586e-05, "loss": 2.2589, "step": 4498 }, { "epoch": 0.6, "grad_norm": 0.2890625, "learning_rate": 4.8490786844735484e-05, "loss": 2.2744, "step": 4499 }, { "epoch": 0.6, "grad_norm": 0.310546875, "learning_rate": 4.8490062473627927e-05, "loss": 2.248, "step": 4500 }, { "epoch": 0.6, "grad_norm": 0.29296875, "learning_rate": 4.8489337934138385e-05, "loss": 2.2536, "step": 4501 }, { "epoch": 0.6, "grad_norm": 0.298828125, "learning_rate": 4.848861322627204e-05, "loss": 2.2342, "step": 4502 }, { "epoch": 0.6, "grad_norm": 0.283203125, "learning_rate": 4.8487888350034096e-05, "loss": 2.259, "step": 4503 }, { "epoch": 0.6, "grad_norm": 0.294921875, "learning_rate": 4.848716330542975e-05, "loss": 2.2912, "step": 4504 }, { "epoch": 0.6, "grad_norm": 0.30859375, "learning_rate": 4.84864380924642e-05, "loss": 2.2786, "step": 4505 }, { "epoch": 0.6, "grad_norm": 0.296875, "learning_rate": 4.8485712711142633e-05, "loss": 2.2678, "step": 4506 }, { "epoch": 0.6, "grad_norm": 0.27734375, "learning_rate": 4.8484987161470255e-05, "loss": 2.2736, "step": 4507 }, { "epoch": 0.6, "grad_norm": 0.298828125, "learning_rate": 4.848426144345228e-05, "loss": 2.2685, "step": 4508 }, { "epoch": 0.6, "grad_norm": 0.2890625, "learning_rate": 4.848353555709388e-05, "loss": 2.2684, "step": 4509 }, { "epoch": 0.6, "grad_norm": 0.30859375, "learning_rate": 4.8482809502400294e-05, "loss": 2.3206, "step": 4510 }, { "epoch": 0.6, "grad_norm": 0.294921875, "learning_rate": 4.84820832793767e-05, "loss": 2.2666, "step": 4511 }, { "epoch": 0.6, "grad_norm": 0.310546875, "learning_rate": 4.8481356888028325e-05, "loss": 2.248, "step": 4512 }, { "epoch": 0.6, "grad_norm": 0.30078125, "learning_rate": 4.848063032836035e-05, "loss": 2.2804, "step": 4513 }, { "epoch": 0.6, "grad_norm": 0.28125, "learning_rate": 4.847990360037801e-05, "loss": 2.267, "step": 4514 }, { "epoch": 0.6, "grad_norm": 0.28515625, "learning_rate": 4.847917670408649e-05, "loss": 2.2551, "step": 4515 }, { "epoch": 0.6, "grad_norm": 0.302734375, "learning_rate": 4.847844963949102e-05, "loss": 2.2614, "step": 4516 }, { "epoch": 0.6, "grad_norm": 0.271484375, "learning_rate": 4.8477722406596806e-05, "loss": 2.2764, "step": 4517 }, { "epoch": 0.6, "grad_norm": 0.30859375, "learning_rate": 4.8476995005409054e-05, "loss": 2.2576, "step": 4518 }, { "epoch": 0.6, "grad_norm": 0.296875, "learning_rate": 4.847626743593298e-05, "loss": 2.3112, "step": 4519 }, { "epoch": 0.6, "grad_norm": 0.287109375, "learning_rate": 4.8475539698173805e-05, "loss": 2.2704, "step": 4520 }, { "epoch": 0.6, "grad_norm": 0.294921875, "learning_rate": 4.8474811792136744e-05, "loss": 2.2877, "step": 4521 }, { "epoch": 0.6, "grad_norm": 0.302734375, "learning_rate": 4.847408371782701e-05, "loss": 2.2938, "step": 4522 }, { "epoch": 0.6, "grad_norm": 0.294921875, "learning_rate": 4.847335547524983e-05, "loss": 2.2702, "step": 4523 }, { "epoch": 0.6, "grad_norm": 0.3125, "learning_rate": 4.8472627064410426e-05, "loss": 2.2526, "step": 4524 }, { "epoch": 0.6, "grad_norm": 0.283203125, "learning_rate": 4.8471898485314006e-05, "loss": 2.2785, "step": 4525 }, { "epoch": 0.6, "grad_norm": 0.318359375, "learning_rate": 4.847116973796579e-05, "loss": 2.224, "step": 4526 }, { "epoch": 0.6, "grad_norm": 0.294921875, "learning_rate": 4.847044082237103e-05, "loss": 2.2657, "step": 4527 }, { "epoch": 0.6, "grad_norm": 0.330078125, "learning_rate": 4.846971173853493e-05, "loss": 2.2444, "step": 4528 }, { "epoch": 0.6, "grad_norm": 0.2890625, "learning_rate": 4.846898248646271e-05, "loss": 2.2827, "step": 4529 }, { "epoch": 0.6, "grad_norm": 0.302734375, "learning_rate": 4.8468253066159605e-05, "loss": 2.2489, "step": 4530 }, { "epoch": 0.6, "grad_norm": 0.30078125, "learning_rate": 4.846752347763086e-05, "loss": 2.263, "step": 4531 }, { "epoch": 0.6, "grad_norm": 0.3046875, "learning_rate": 4.8466793720881677e-05, "loss": 2.2729, "step": 4532 }, { "epoch": 0.6, "grad_norm": 0.29296875, "learning_rate": 4.8466063795917306e-05, "loss": 2.2677, "step": 4533 }, { "epoch": 0.6, "grad_norm": 0.30078125, "learning_rate": 4.8465333702742974e-05, "loss": 2.2492, "step": 4534 }, { "epoch": 0.6, "grad_norm": 0.310546875, "learning_rate": 4.846460344136391e-05, "loss": 2.2837, "step": 4535 }, { "epoch": 0.61, "grad_norm": 0.298828125, "learning_rate": 4.8463873011785354e-05, "loss": 2.2463, "step": 4536 }, { "epoch": 0.61, "grad_norm": 0.30078125, "learning_rate": 4.846314241401254e-05, "loss": 2.242, "step": 4537 }, { "epoch": 0.61, "grad_norm": 0.283203125, "learning_rate": 4.846241164805071e-05, "loss": 2.2768, "step": 4538 }, { "epoch": 0.61, "grad_norm": 0.30078125, "learning_rate": 4.8461680713905086e-05, "loss": 2.2668, "step": 4539 }, { "epoch": 0.61, "grad_norm": 0.296875, "learning_rate": 4.846094961158093e-05, "loss": 2.2764, "step": 4540 }, { "epoch": 0.61, "grad_norm": 0.3046875, "learning_rate": 4.8460218341083474e-05, "loss": 2.2886, "step": 4541 }, { "epoch": 0.61, "grad_norm": 0.310546875, "learning_rate": 4.845948690241795e-05, "loss": 2.265, "step": 4542 }, { "epoch": 0.61, "grad_norm": 0.30078125, "learning_rate": 4.845875529558962e-05, "loss": 2.2561, "step": 4543 }, { "epoch": 0.61, "grad_norm": 0.318359375, "learning_rate": 4.84580235206037e-05, "loss": 2.2927, "step": 4544 }, { "epoch": 0.61, "grad_norm": 0.310546875, "learning_rate": 4.845729157746547e-05, "loss": 2.2788, "step": 4545 }, { "epoch": 0.61, "grad_norm": 0.298828125, "learning_rate": 4.845655946618015e-05, "loss": 2.2851, "step": 4546 }, { "epoch": 0.61, "grad_norm": 0.31640625, "learning_rate": 4.8455827186753e-05, "loss": 2.2805, "step": 4547 }, { "epoch": 0.61, "grad_norm": 0.291015625, "learning_rate": 4.845509473918927e-05, "loss": 2.2503, "step": 4548 }, { "epoch": 0.61, "grad_norm": 0.28125, "learning_rate": 4.84543621234942e-05, "loss": 2.2755, "step": 4549 }, { "epoch": 0.61, "grad_norm": 0.3046875, "learning_rate": 4.845362933967305e-05, "loss": 2.2488, "step": 4550 }, { "epoch": 0.61, "grad_norm": 0.283203125, "learning_rate": 4.8452896387731074e-05, "loss": 2.2531, "step": 4551 }, { "epoch": 0.61, "grad_norm": 0.30078125, "learning_rate": 4.845216326767352e-05, "loss": 2.2373, "step": 4552 }, { "epoch": 0.61, "grad_norm": 0.29296875, "learning_rate": 4.8451429979505655e-05, "loss": 2.3334, "step": 4553 }, { "epoch": 0.61, "grad_norm": 0.302734375, "learning_rate": 4.845069652323272e-05, "loss": 2.2847, "step": 4554 }, { "epoch": 0.61, "grad_norm": 0.298828125, "learning_rate": 4.844996289885998e-05, "loss": 2.2795, "step": 4555 }, { "epoch": 0.61, "grad_norm": 0.287109375, "learning_rate": 4.844922910639269e-05, "loss": 2.247, "step": 4556 }, { "epoch": 0.61, "grad_norm": 0.294921875, "learning_rate": 4.8448495145836124e-05, "loss": 2.2685, "step": 4557 }, { "epoch": 0.61, "grad_norm": 0.291015625, "learning_rate": 4.844776101719553e-05, "loss": 2.268, "step": 4558 }, { "epoch": 0.61, "grad_norm": 0.30078125, "learning_rate": 4.8447026720476166e-05, "loss": 2.2814, "step": 4559 }, { "epoch": 0.61, "grad_norm": 0.2890625, "learning_rate": 4.84462922556833e-05, "loss": 2.2812, "step": 4560 }, { "epoch": 0.61, "grad_norm": 0.3046875, "learning_rate": 4.84455576228222e-05, "loss": 2.2936, "step": 4561 }, { "epoch": 0.61, "grad_norm": 0.294921875, "learning_rate": 4.844482282189814e-05, "loss": 2.2594, "step": 4562 }, { "epoch": 0.61, "grad_norm": 0.314453125, "learning_rate": 4.8444087852916374e-05, "loss": 2.263, "step": 4563 }, { "epoch": 0.61, "grad_norm": 0.302734375, "learning_rate": 4.8443352715882174e-05, "loss": 2.2748, "step": 4564 }, { "epoch": 0.61, "grad_norm": 0.291015625, "learning_rate": 4.8442617410800804e-05, "loss": 2.2512, "step": 4565 }, { "epoch": 0.61, "grad_norm": 0.3046875, "learning_rate": 4.844188193767755e-05, "loss": 2.2746, "step": 4566 }, { "epoch": 0.61, "grad_norm": 0.29296875, "learning_rate": 4.844114629651768e-05, "loss": 2.2289, "step": 4567 }, { "epoch": 0.61, "grad_norm": 0.30078125, "learning_rate": 4.8440410487326446e-05, "loss": 2.2938, "step": 4568 }, { "epoch": 0.61, "grad_norm": 0.29296875, "learning_rate": 4.843967451010916e-05, "loss": 2.2484, "step": 4569 }, { "epoch": 0.61, "grad_norm": 0.298828125, "learning_rate": 4.8438938364871055e-05, "loss": 2.2538, "step": 4570 }, { "epoch": 0.61, "grad_norm": 0.302734375, "learning_rate": 4.843820205161744e-05, "loss": 2.2891, "step": 4571 }, { "epoch": 0.61, "grad_norm": 0.2890625, "learning_rate": 4.843746557035359e-05, "loss": 2.2444, "step": 4572 }, { "epoch": 0.61, "grad_norm": 0.27734375, "learning_rate": 4.843672892108477e-05, "loss": 2.2623, "step": 4573 }, { "epoch": 0.61, "grad_norm": 0.28515625, "learning_rate": 4.843599210381626e-05, "loss": 2.2662, "step": 4574 }, { "epoch": 0.61, "grad_norm": 0.296875, "learning_rate": 4.843525511855336e-05, "loss": 2.2891, "step": 4575 }, { "epoch": 0.61, "grad_norm": 0.294921875, "learning_rate": 4.843451796530134e-05, "loss": 2.2896, "step": 4576 }, { "epoch": 0.61, "grad_norm": 0.296875, "learning_rate": 4.8433780644065485e-05, "loss": 2.2902, "step": 4577 }, { "epoch": 0.61, "grad_norm": 0.302734375, "learning_rate": 4.843304315485107e-05, "loss": 2.2643, "step": 4578 }, { "epoch": 0.61, "grad_norm": 0.298828125, "learning_rate": 4.843230549766341e-05, "loss": 2.2338, "step": 4579 }, { "epoch": 0.61, "grad_norm": 0.3046875, "learning_rate": 4.843156767250777e-05, "loss": 2.279, "step": 4580 }, { "epoch": 0.61, "grad_norm": 0.30859375, "learning_rate": 4.8430829679389436e-05, "loss": 2.2559, "step": 4581 }, { "epoch": 0.61, "grad_norm": 0.283203125, "learning_rate": 4.8430091518313715e-05, "loss": 2.2659, "step": 4582 }, { "epoch": 0.61, "grad_norm": 0.302734375, "learning_rate": 4.842935318928589e-05, "loss": 2.2403, "step": 4583 }, { "epoch": 0.61, "grad_norm": 0.30859375, "learning_rate": 4.842861469231125e-05, "loss": 2.2355, "step": 4584 }, { "epoch": 0.61, "grad_norm": 0.30859375, "learning_rate": 4.842787602739509e-05, "loss": 2.2808, "step": 4585 }, { "epoch": 0.61, "grad_norm": 0.27734375, "learning_rate": 4.84271371945427e-05, "loss": 2.2678, "step": 4586 }, { "epoch": 0.61, "grad_norm": 0.28515625, "learning_rate": 4.8426398193759406e-05, "loss": 2.2547, "step": 4587 }, { "epoch": 0.61, "grad_norm": 0.3046875, "learning_rate": 4.8425659025050465e-05, "loss": 2.2491, "step": 4588 }, { "epoch": 0.61, "grad_norm": 0.294921875, "learning_rate": 4.842491968842119e-05, "loss": 2.2756, "step": 4589 }, { "epoch": 0.61, "grad_norm": 0.30859375, "learning_rate": 4.8424180183876895e-05, "loss": 2.2742, "step": 4590 }, { "epoch": 0.61, "grad_norm": 0.298828125, "learning_rate": 4.8423440511422865e-05, "loss": 2.2884, "step": 4591 }, { "epoch": 0.61, "grad_norm": 0.310546875, "learning_rate": 4.84227006710644e-05, "loss": 2.2615, "step": 4592 }, { "epoch": 0.61, "grad_norm": 0.294921875, "learning_rate": 4.842196066280682e-05, "loss": 2.2723, "step": 4593 }, { "epoch": 0.61, "grad_norm": 0.30078125, "learning_rate": 4.842122048665542e-05, "loss": 2.2764, "step": 4594 }, { "epoch": 0.61, "grad_norm": 0.298828125, "learning_rate": 4.842048014261549e-05, "loss": 2.2888, "step": 4595 }, { "epoch": 0.61, "grad_norm": 0.2890625, "learning_rate": 4.8419739630692364e-05, "loss": 2.2743, "step": 4596 }, { "epoch": 0.61, "grad_norm": 0.291015625, "learning_rate": 4.8418998950891336e-05, "loss": 2.2275, "step": 4597 }, { "epoch": 0.61, "grad_norm": 0.314453125, "learning_rate": 4.8418258103217716e-05, "loss": 2.2907, "step": 4598 }, { "epoch": 0.61, "grad_norm": 0.3046875, "learning_rate": 4.841751708767682e-05, "loss": 2.2871, "step": 4599 }, { "epoch": 0.61, "grad_norm": 0.296875, "learning_rate": 4.841677590427396e-05, "loss": 2.2459, "step": 4600 }, { "epoch": 0.61, "grad_norm": 0.291015625, "learning_rate": 4.841603455301443e-05, "loss": 2.2308, "step": 4601 }, { "epoch": 0.61, "grad_norm": 0.29296875, "learning_rate": 4.8415293033903576e-05, "loss": 2.2766, "step": 4602 }, { "epoch": 0.61, "grad_norm": 0.298828125, "learning_rate": 4.841455134694669e-05, "loss": 2.2838, "step": 4603 }, { "epoch": 0.61, "grad_norm": 0.287109375, "learning_rate": 4.841380949214909e-05, "loss": 2.2397, "step": 4604 }, { "epoch": 0.61, "grad_norm": 0.29296875, "learning_rate": 4.8413067469516104e-05, "loss": 2.2854, "step": 4605 }, { "epoch": 0.61, "grad_norm": 0.283203125, "learning_rate": 4.841232527905305e-05, "loss": 2.2528, "step": 4606 }, { "epoch": 0.61, "grad_norm": 0.322265625, "learning_rate": 4.8411582920765234e-05, "loss": 2.2739, "step": 4607 }, { "epoch": 0.61, "grad_norm": 0.3046875, "learning_rate": 4.8410840394658e-05, "loss": 2.268, "step": 4608 }, { "epoch": 0.61, "grad_norm": 0.31640625, "learning_rate": 4.8410097700736647e-05, "loss": 2.3019, "step": 4609 }, { "epoch": 0.61, "grad_norm": 0.30859375, "learning_rate": 4.840935483900651e-05, "loss": 2.2696, "step": 4610 }, { "epoch": 0.62, "grad_norm": 0.29296875, "learning_rate": 4.840861180947292e-05, "loss": 2.2758, "step": 4611 }, { "epoch": 0.62, "grad_norm": 0.294921875, "learning_rate": 4.840786861214119e-05, "loss": 2.2394, "step": 4612 }, { "epoch": 0.62, "grad_norm": 0.3046875, "learning_rate": 4.840712524701666e-05, "loss": 2.2792, "step": 4613 }, { "epoch": 0.62, "grad_norm": 0.283203125, "learning_rate": 4.840638171410465e-05, "loss": 2.2827, "step": 4614 }, { "epoch": 0.62, "grad_norm": 0.3046875, "learning_rate": 4.840563801341049e-05, "loss": 2.2775, "step": 4615 }, { "epoch": 0.62, "grad_norm": 0.306640625, "learning_rate": 4.8404894144939526e-05, "loss": 2.2664, "step": 4616 }, { "epoch": 0.62, "grad_norm": 0.287109375, "learning_rate": 4.8404150108697065e-05, "loss": 2.2372, "step": 4617 }, { "epoch": 0.62, "grad_norm": 0.314453125, "learning_rate": 4.840340590468846e-05, "loss": 2.2849, "step": 4618 }, { "epoch": 0.62, "grad_norm": 0.2890625, "learning_rate": 4.8402661532919036e-05, "loss": 2.2519, "step": 4619 }, { "epoch": 0.62, "grad_norm": 0.2890625, "learning_rate": 4.8401916993394134e-05, "loss": 2.2598, "step": 4620 }, { "epoch": 0.62, "grad_norm": 0.296875, "learning_rate": 4.8401172286119087e-05, "loss": 2.2443, "step": 4621 }, { "epoch": 0.62, "grad_norm": 0.28515625, "learning_rate": 4.840042741109924e-05, "loss": 2.2546, "step": 4622 }, { "epoch": 0.62, "grad_norm": 0.302734375, "learning_rate": 4.839968236833993e-05, "loss": 2.2555, "step": 4623 }, { "epoch": 0.62, "grad_norm": 0.291015625, "learning_rate": 4.839893715784648e-05, "loss": 2.2695, "step": 4624 }, { "epoch": 0.62, "grad_norm": 0.296875, "learning_rate": 4.839819177962426e-05, "loss": 2.2467, "step": 4625 }, { "epoch": 0.62, "grad_norm": 0.28515625, "learning_rate": 4.83974462336786e-05, "loss": 2.2885, "step": 4626 }, { "epoch": 0.62, "grad_norm": 0.29296875, "learning_rate": 4.8396700520014835e-05, "loss": 2.292, "step": 4627 }, { "epoch": 0.62, "grad_norm": 0.296875, "learning_rate": 4.839595463863833e-05, "loss": 2.2813, "step": 4628 }, { "epoch": 0.62, "grad_norm": 0.314453125, "learning_rate": 4.839520858955442e-05, "loss": 2.2452, "step": 4629 }, { "epoch": 0.62, "grad_norm": 0.287109375, "learning_rate": 4.839446237276845e-05, "loss": 2.2676, "step": 4630 }, { "epoch": 0.62, "grad_norm": 0.29296875, "learning_rate": 4.839371598828577e-05, "loss": 2.2565, "step": 4631 }, { "epoch": 0.62, "grad_norm": 0.291015625, "learning_rate": 4.839296943611174e-05, "loss": 2.2574, "step": 4632 }, { "epoch": 0.62, "grad_norm": 0.306640625, "learning_rate": 4.8392222716251705e-05, "loss": 2.2494, "step": 4633 }, { "epoch": 0.62, "grad_norm": 0.28515625, "learning_rate": 4.839147582871102e-05, "loss": 2.2708, "step": 4634 }, { "epoch": 0.62, "grad_norm": 0.2890625, "learning_rate": 4.839072877349503e-05, "loss": 2.2128, "step": 4635 }, { "epoch": 0.62, "grad_norm": 0.30859375, "learning_rate": 4.8389981550609096e-05, "loss": 2.2804, "step": 4636 }, { "epoch": 0.62, "grad_norm": 0.302734375, "learning_rate": 4.8389234160058585e-05, "loss": 2.2838, "step": 4637 }, { "epoch": 0.62, "grad_norm": 0.306640625, "learning_rate": 4.838848660184883e-05, "loss": 2.2516, "step": 4638 }, { "epoch": 0.62, "grad_norm": 0.314453125, "learning_rate": 4.838773887598521e-05, "loss": 2.2784, "step": 4639 }, { "epoch": 0.62, "grad_norm": 0.28515625, "learning_rate": 4.8386990982473084e-05, "loss": 2.2789, "step": 4640 }, { "epoch": 0.62, "grad_norm": 0.28515625, "learning_rate": 4.83862429213178e-05, "loss": 2.261, "step": 4641 }, { "epoch": 0.62, "grad_norm": 0.3046875, "learning_rate": 4.8385494692524735e-05, "loss": 2.2662, "step": 4642 }, { "epoch": 0.62, "grad_norm": 0.29296875, "learning_rate": 4.838474629609925e-05, "loss": 2.264, "step": 4643 }, { "epoch": 0.62, "grad_norm": 0.3046875, "learning_rate": 4.8383997732046695e-05, "loss": 2.245, "step": 4644 }, { "epoch": 0.62, "grad_norm": 0.306640625, "learning_rate": 4.8383249000372446e-05, "loss": 2.2706, "step": 4645 }, { "epoch": 0.62, "grad_norm": 0.294921875, "learning_rate": 4.8382500101081876e-05, "loss": 2.2631, "step": 4646 }, { "epoch": 0.62, "grad_norm": 0.298828125, "learning_rate": 4.8381751034180345e-05, "loss": 2.2361, "step": 4647 }, { "epoch": 0.62, "grad_norm": 0.30859375, "learning_rate": 4.838100179967322e-05, "loss": 2.2121, "step": 4648 }, { "epoch": 0.62, "grad_norm": 0.296875, "learning_rate": 4.838025239756588e-05, "loss": 2.2772, "step": 4649 }, { "epoch": 0.62, "grad_norm": 0.30859375, "learning_rate": 4.8379502827863696e-05, "loss": 2.2436, "step": 4650 }, { "epoch": 0.62, "grad_norm": 0.2890625, "learning_rate": 4.8378753090572044e-05, "loss": 2.2527, "step": 4651 }, { "epoch": 0.62, "grad_norm": 0.298828125, "learning_rate": 4.8378003185696285e-05, "loss": 2.2418, "step": 4652 }, { "epoch": 0.62, "grad_norm": 0.302734375, "learning_rate": 4.8377253113241807e-05, "loss": 2.2751, "step": 4653 }, { "epoch": 0.62, "grad_norm": 0.30859375, "learning_rate": 4.837650287321398e-05, "loss": 2.2646, "step": 4654 }, { "epoch": 0.62, "grad_norm": 0.30859375, "learning_rate": 4.8375752465618186e-05, "loss": 2.2676, "step": 4655 }, { "epoch": 0.62, "grad_norm": 0.298828125, "learning_rate": 4.837500189045979e-05, "loss": 2.2427, "step": 4656 }, { "epoch": 0.62, "grad_norm": 0.291015625, "learning_rate": 4.83742511477442e-05, "loss": 2.2726, "step": 4657 }, { "epoch": 0.62, "grad_norm": 0.298828125, "learning_rate": 4.837350023747678e-05, "loss": 2.2764, "step": 4658 }, { "epoch": 0.62, "grad_norm": 0.296875, "learning_rate": 4.8372749159662904e-05, "loss": 2.2585, "step": 4659 }, { "epoch": 0.62, "grad_norm": 0.31640625, "learning_rate": 4.837199791430798e-05, "loss": 2.2748, "step": 4660 }, { "epoch": 0.62, "grad_norm": 0.28515625, "learning_rate": 4.837124650141737e-05, "loss": 2.2773, "step": 4661 }, { "epoch": 0.62, "grad_norm": 0.294921875, "learning_rate": 4.8370494920996465e-05, "loss": 2.2804, "step": 4662 }, { "epoch": 0.62, "grad_norm": 0.30859375, "learning_rate": 4.836974317305067e-05, "loss": 2.3045, "step": 4663 }, { "epoch": 0.62, "grad_norm": 0.29296875, "learning_rate": 4.8368991257585354e-05, "loss": 2.2404, "step": 4664 }, { "epoch": 0.62, "grad_norm": 0.2890625, "learning_rate": 4.836823917460592e-05, "loss": 2.2986, "step": 4665 }, { "epoch": 0.62, "grad_norm": 0.302734375, "learning_rate": 4.8367486924117745e-05, "loss": 2.2717, "step": 4666 }, { "epoch": 0.62, "grad_norm": 0.298828125, "learning_rate": 4.8366734506126234e-05, "loss": 2.256, "step": 4667 }, { "epoch": 0.62, "grad_norm": 0.294921875, "learning_rate": 4.836598192063677e-05, "loss": 2.2755, "step": 4668 }, { "epoch": 0.62, "grad_norm": 0.30859375, "learning_rate": 4.8365229167654764e-05, "loss": 2.2611, "step": 4669 }, { "epoch": 0.62, "grad_norm": 0.3046875, "learning_rate": 4.8364476247185595e-05, "loss": 2.2781, "step": 4670 }, { "epoch": 0.62, "grad_norm": 0.296875, "learning_rate": 4.836372315923466e-05, "loss": 2.2622, "step": 4671 }, { "epoch": 0.62, "grad_norm": 0.283203125, "learning_rate": 4.8362969903807376e-05, "loss": 2.3047, "step": 4672 }, { "epoch": 0.62, "grad_norm": 0.29296875, "learning_rate": 4.836221648090913e-05, "loss": 2.2496, "step": 4673 }, { "epoch": 0.62, "grad_norm": 0.296875, "learning_rate": 4.836146289054532e-05, "loss": 2.2955, "step": 4674 }, { "epoch": 0.62, "grad_norm": 0.322265625, "learning_rate": 4.8360709132721346e-05, "loss": 2.2578, "step": 4675 }, { "epoch": 0.62, "grad_norm": 0.296875, "learning_rate": 4.835995520744262e-05, "loss": 2.269, "step": 4676 }, { "epoch": 0.62, "grad_norm": 0.283203125, "learning_rate": 4.8359201114714545e-05, "loss": 2.2715, "step": 4677 }, { "epoch": 0.62, "grad_norm": 0.318359375, "learning_rate": 4.835844685454252e-05, "loss": 2.2729, "step": 4678 }, { "epoch": 0.62, "grad_norm": 0.296875, "learning_rate": 4.835769242693195e-05, "loss": 2.2557, "step": 4679 }, { "epoch": 0.62, "grad_norm": 0.298828125, "learning_rate": 4.835693783188826e-05, "loss": 2.2548, "step": 4680 }, { "epoch": 0.62, "grad_norm": 0.30859375, "learning_rate": 4.835618306941684e-05, "loss": 2.2522, "step": 4681 }, { "epoch": 0.62, "grad_norm": 0.287109375, "learning_rate": 4.835542813952311e-05, "loss": 2.2864, "step": 4682 }, { "epoch": 0.62, "grad_norm": 0.2890625, "learning_rate": 4.835467304221248e-05, "loss": 2.2915, "step": 4683 }, { "epoch": 0.62, "grad_norm": 0.302734375, "learning_rate": 4.835391777749037e-05, "loss": 2.2688, "step": 4684 }, { "epoch": 0.62, "grad_norm": 0.287109375, "learning_rate": 4.8353162345362166e-05, "loss": 2.2321, "step": 4685 }, { "epoch": 0.63, "grad_norm": 0.291015625, "learning_rate": 4.835240674583332e-05, "loss": 2.2732, "step": 4686 }, { "epoch": 0.63, "grad_norm": 0.279296875, "learning_rate": 4.8351650978909224e-05, "loss": 2.2397, "step": 4687 }, { "epoch": 0.63, "grad_norm": 0.291015625, "learning_rate": 4.835089504459531e-05, "loss": 2.272, "step": 4688 }, { "epoch": 0.63, "grad_norm": 0.291015625, "learning_rate": 4.8350138942896986e-05, "loss": 2.2784, "step": 4689 }, { "epoch": 0.63, "grad_norm": 0.298828125, "learning_rate": 4.834938267381966e-05, "loss": 2.2568, "step": 4690 }, { "epoch": 0.63, "grad_norm": 0.291015625, "learning_rate": 4.834862623736879e-05, "loss": 2.2804, "step": 4691 }, { "epoch": 0.63, "grad_norm": 0.29296875, "learning_rate": 4.834786963354977e-05, "loss": 2.2753, "step": 4692 }, { "epoch": 0.63, "grad_norm": 0.29296875, "learning_rate": 4.8347112862368034e-05, "loss": 2.2836, "step": 4693 }, { "epoch": 0.63, "grad_norm": 0.29296875, "learning_rate": 4.8346355923829e-05, "loss": 2.2864, "step": 4694 }, { "epoch": 0.63, "grad_norm": 0.2890625, "learning_rate": 4.834559881793809e-05, "loss": 2.2736, "step": 4695 }, { "epoch": 0.63, "grad_norm": 0.3125, "learning_rate": 4.834484154470075e-05, "loss": 2.2813, "step": 4696 }, { "epoch": 0.63, "grad_norm": 0.298828125, "learning_rate": 4.834408410412239e-05, "loss": 2.3213, "step": 4697 }, { "epoch": 0.63, "grad_norm": 0.298828125, "learning_rate": 4.834332649620845e-05, "loss": 2.2554, "step": 4698 }, { "epoch": 0.63, "grad_norm": 0.27734375, "learning_rate": 4.834256872096435e-05, "loss": 2.2606, "step": 4699 }, { "epoch": 0.63, "grad_norm": 0.298828125, "learning_rate": 4.834181077839553e-05, "loss": 2.2738, "step": 4700 }, { "epoch": 0.63, "grad_norm": 0.291015625, "learning_rate": 4.8341052668507424e-05, "loss": 2.2905, "step": 4701 }, { "epoch": 0.63, "grad_norm": 0.302734375, "learning_rate": 4.834029439130546e-05, "loss": 2.2656, "step": 4702 }, { "epoch": 0.63, "grad_norm": 0.283203125, "learning_rate": 4.833953594679508e-05, "loss": 2.2896, "step": 4703 }, { "epoch": 0.63, "grad_norm": 0.298828125, "learning_rate": 4.833877733498172e-05, "loss": 2.2679, "step": 4704 }, { "epoch": 0.63, "grad_norm": 0.283203125, "learning_rate": 4.8338018555870826e-05, "loss": 2.2441, "step": 4705 }, { "epoch": 0.63, "grad_norm": 0.283203125, "learning_rate": 4.833725960946781e-05, "loss": 2.2429, "step": 4706 }, { "epoch": 0.63, "grad_norm": 0.2890625, "learning_rate": 4.8336500495778136e-05, "loss": 2.23, "step": 4707 }, { "epoch": 0.63, "grad_norm": 0.296875, "learning_rate": 4.833574121480724e-05, "loss": 2.2589, "step": 4708 }, { "epoch": 0.63, "grad_norm": 0.296875, "learning_rate": 4.833498176656056e-05, "loss": 2.275, "step": 4709 }, { "epoch": 0.63, "grad_norm": 0.3046875, "learning_rate": 4.833422215104355e-05, "loss": 2.2773, "step": 4710 }, { "epoch": 0.63, "grad_norm": 0.294921875, "learning_rate": 4.833346236826164e-05, "loss": 2.2487, "step": 4711 }, { "epoch": 0.63, "grad_norm": 0.291015625, "learning_rate": 4.8332702418220286e-05, "loss": 2.2473, "step": 4712 }, { "epoch": 0.63, "grad_norm": 0.28125, "learning_rate": 4.833194230092494e-05, "loss": 2.2579, "step": 4713 }, { "epoch": 0.63, "grad_norm": 0.29296875, "learning_rate": 4.833118201638105e-05, "loss": 2.2415, "step": 4714 }, { "epoch": 0.63, "grad_norm": 0.291015625, "learning_rate": 4.833042156459405e-05, "loss": 2.2645, "step": 4715 }, { "epoch": 0.63, "grad_norm": 0.302734375, "learning_rate": 4.8329660945569404e-05, "loss": 2.2728, "step": 4716 }, { "epoch": 0.63, "grad_norm": 0.291015625, "learning_rate": 4.832890015931256e-05, "loss": 2.3087, "step": 4717 }, { "epoch": 0.63, "grad_norm": 0.296875, "learning_rate": 4.832813920582898e-05, "loss": 2.267, "step": 4718 }, { "epoch": 0.63, "grad_norm": 0.310546875, "learning_rate": 4.8327378085124115e-05, "loss": 2.2509, "step": 4719 }, { "epoch": 0.63, "grad_norm": 0.2890625, "learning_rate": 4.832661679720342e-05, "loss": 2.2646, "step": 4720 }, { "epoch": 0.63, "grad_norm": 0.2890625, "learning_rate": 4.832585534207233e-05, "loss": 2.3336, "step": 4721 }, { "epoch": 0.63, "grad_norm": 0.27734375, "learning_rate": 4.832509371973635e-05, "loss": 2.266, "step": 4722 }, { "epoch": 0.63, "grad_norm": 0.279296875, "learning_rate": 4.8324331930200895e-05, "loss": 2.2477, "step": 4723 }, { "epoch": 0.63, "grad_norm": 0.294921875, "learning_rate": 4.832356997347145e-05, "loss": 2.2451, "step": 4724 }, { "epoch": 0.63, "grad_norm": 0.314453125, "learning_rate": 4.832280784955348e-05, "loss": 2.2836, "step": 4725 }, { "epoch": 0.63, "grad_norm": 0.30078125, "learning_rate": 4.832204555845242e-05, "loss": 2.2726, "step": 4726 }, { "epoch": 0.63, "grad_norm": 0.291015625, "learning_rate": 4.832128310017376e-05, "loss": 2.2829, "step": 4727 }, { "epoch": 0.63, "grad_norm": 0.287109375, "learning_rate": 4.832052047472297e-05, "loss": 2.2487, "step": 4728 }, { "epoch": 0.63, "grad_norm": 0.2890625, "learning_rate": 4.8319757682105495e-05, "loss": 2.2591, "step": 4729 }, { "epoch": 0.63, "grad_norm": 0.302734375, "learning_rate": 4.8318994722326814e-05, "loss": 2.2437, "step": 4730 }, { "epoch": 0.63, "grad_norm": 0.28515625, "learning_rate": 4.83182315953924e-05, "loss": 2.2492, "step": 4731 }, { "epoch": 0.63, "grad_norm": 0.294921875, "learning_rate": 4.831746830130771e-05, "loss": 2.2629, "step": 4732 }, { "epoch": 0.63, "grad_norm": 0.294921875, "learning_rate": 4.831670484007823e-05, "loss": 2.2563, "step": 4733 }, { "epoch": 0.63, "grad_norm": 0.302734375, "learning_rate": 4.831594121170943e-05, "loss": 2.2387, "step": 4734 }, { "epoch": 0.63, "grad_norm": 0.30078125, "learning_rate": 4.8315177416206774e-05, "loss": 2.255, "step": 4735 }, { "epoch": 0.63, "grad_norm": 0.302734375, "learning_rate": 4.831441345357574e-05, "loss": 2.2736, "step": 4736 }, { "epoch": 0.63, "grad_norm": 0.30859375, "learning_rate": 4.831364932382182e-05, "loss": 2.2972, "step": 4737 }, { "epoch": 0.63, "grad_norm": 0.30859375, "learning_rate": 4.831288502695047e-05, "loss": 2.2859, "step": 4738 }, { "epoch": 0.63, "grad_norm": 0.283203125, "learning_rate": 4.831212056296718e-05, "loss": 2.2702, "step": 4739 }, { "epoch": 0.63, "grad_norm": 0.28515625, "learning_rate": 4.8311355931877435e-05, "loss": 2.2482, "step": 4740 }, { "epoch": 0.63, "grad_norm": 0.32421875, "learning_rate": 4.83105911336867e-05, "loss": 2.2703, "step": 4741 }, { "epoch": 0.63, "grad_norm": 0.3046875, "learning_rate": 4.8309826168400464e-05, "loss": 2.2728, "step": 4742 }, { "epoch": 0.63, "grad_norm": 0.28125, "learning_rate": 4.830906103602422e-05, "loss": 2.2696, "step": 4743 }, { "epoch": 0.63, "grad_norm": 0.322265625, "learning_rate": 4.830829573656344e-05, "loss": 2.326, "step": 4744 }, { "epoch": 0.63, "grad_norm": 0.291015625, "learning_rate": 4.8307530270023614e-05, "loss": 2.269, "step": 4745 }, { "epoch": 0.63, "grad_norm": 0.302734375, "learning_rate": 4.8306764636410234e-05, "loss": 2.2661, "step": 4746 }, { "epoch": 0.63, "grad_norm": 0.30859375, "learning_rate": 4.830599883572878e-05, "loss": 2.2634, "step": 4747 }, { "epoch": 0.63, "grad_norm": 0.30078125, "learning_rate": 4.830523286798475e-05, "loss": 2.2815, "step": 4748 }, { "epoch": 0.63, "grad_norm": 0.310546875, "learning_rate": 4.830446673318363e-05, "loss": 2.2713, "step": 4749 }, { "epoch": 0.63, "grad_norm": 0.296875, "learning_rate": 4.830370043133091e-05, "loss": 2.2596, "step": 4750 }, { "epoch": 0.63, "grad_norm": 0.318359375, "learning_rate": 4.8302933962432084e-05, "loss": 2.2667, "step": 4751 }, { "epoch": 0.63, "grad_norm": 0.283203125, "learning_rate": 4.8302167326492654e-05, "loss": 2.2852, "step": 4752 }, { "epoch": 0.63, "grad_norm": 0.30859375, "learning_rate": 4.8301400523518096e-05, "loss": 2.2244, "step": 4753 }, { "epoch": 0.63, "grad_norm": 0.294921875, "learning_rate": 4.830063355351393e-05, "loss": 2.2827, "step": 4754 }, { "epoch": 0.63, "grad_norm": 0.29296875, "learning_rate": 4.829986641648564e-05, "loss": 2.2837, "step": 4755 }, { "epoch": 0.63, "grad_norm": 0.2890625, "learning_rate": 4.829909911243873e-05, "loss": 2.2825, "step": 4756 }, { "epoch": 0.63, "grad_norm": 0.302734375, "learning_rate": 4.829833164137869e-05, "loss": 2.2486, "step": 4757 }, { "epoch": 0.63, "grad_norm": 0.283203125, "learning_rate": 4.829756400331104e-05, "loss": 2.2811, "step": 4758 }, { "epoch": 0.63, "grad_norm": 0.28515625, "learning_rate": 4.829679619824127e-05, "loss": 2.2641, "step": 4759 }, { "epoch": 0.63, "grad_norm": 0.28515625, "learning_rate": 4.829602822617488e-05, "loss": 2.2796, "step": 4760 }, { "epoch": 0.64, "grad_norm": 0.283203125, "learning_rate": 4.829526008711739e-05, "loss": 2.2371, "step": 4761 }, { "epoch": 0.64, "grad_norm": 0.30859375, "learning_rate": 4.8294491781074294e-05, "loss": 2.2311, "step": 4762 }, { "epoch": 0.64, "grad_norm": 0.322265625, "learning_rate": 4.8293723308051095e-05, "loss": 2.27, "step": 4763 }, { "epoch": 0.64, "grad_norm": 0.291015625, "learning_rate": 4.829295466805333e-05, "loss": 2.2766, "step": 4764 }, { "epoch": 0.64, "grad_norm": 0.296875, "learning_rate": 4.829218586108647e-05, "loss": 2.257, "step": 4765 }, { "epoch": 0.64, "grad_norm": 0.30078125, "learning_rate": 4.8291416887156044e-05, "loss": 2.2686, "step": 4766 }, { "epoch": 0.64, "grad_norm": 0.31640625, "learning_rate": 4.829064774626757e-05, "loss": 2.2762, "step": 4767 }, { "epoch": 0.64, "grad_norm": 0.3046875, "learning_rate": 4.828987843842655e-05, "loss": 2.275, "step": 4768 }, { "epoch": 0.64, "grad_norm": 0.296875, "learning_rate": 4.828910896363851e-05, "loss": 2.2451, "step": 4769 }, { "epoch": 0.64, "grad_norm": 0.3046875, "learning_rate": 4.828833932190896e-05, "loss": 2.2343, "step": 4770 }, { "epoch": 0.64, "grad_norm": 0.306640625, "learning_rate": 4.8287569513243416e-05, "loss": 2.2497, "step": 4771 }, { "epoch": 0.64, "grad_norm": 0.302734375, "learning_rate": 4.82867995376474e-05, "loss": 2.2557, "step": 4772 }, { "epoch": 0.64, "grad_norm": 0.283203125, "learning_rate": 4.828602939512642e-05, "loss": 2.286, "step": 4773 }, { "epoch": 0.64, "grad_norm": 0.3125, "learning_rate": 4.828525908568601e-05, "loss": 2.2837, "step": 4774 }, { "epoch": 0.64, "grad_norm": 0.30859375, "learning_rate": 4.828448860933169e-05, "loss": 2.2958, "step": 4775 }, { "epoch": 0.64, "grad_norm": 0.296875, "learning_rate": 4.8283717966068974e-05, "loss": 2.2459, "step": 4776 }, { "epoch": 0.64, "grad_norm": 0.2890625, "learning_rate": 4.8282947155903394e-05, "loss": 2.2573, "step": 4777 }, { "epoch": 0.64, "grad_norm": 0.30078125, "learning_rate": 4.8282176178840474e-05, "loss": 2.2376, "step": 4778 }, { "epoch": 0.64, "grad_norm": 0.28125, "learning_rate": 4.828140503488574e-05, "loss": 2.2774, "step": 4779 }, { "epoch": 0.64, "grad_norm": 0.310546875, "learning_rate": 4.828063372404472e-05, "loss": 2.282, "step": 4780 }, { "epoch": 0.64, "grad_norm": 0.287109375, "learning_rate": 4.8279862246322935e-05, "loss": 2.2755, "step": 4781 }, { "epoch": 0.64, "grad_norm": 0.296875, "learning_rate": 4.8279090601725934e-05, "loss": 2.2901, "step": 4782 }, { "epoch": 0.64, "grad_norm": 0.3046875, "learning_rate": 4.827831879025923e-05, "loss": 2.2792, "step": 4783 }, { "epoch": 0.64, "grad_norm": 0.29296875, "learning_rate": 4.8277546811928364e-05, "loss": 2.2737, "step": 4784 }, { "epoch": 0.64, "grad_norm": 0.283203125, "learning_rate": 4.827677466673887e-05, "loss": 2.2997, "step": 4785 }, { "epoch": 0.64, "grad_norm": 0.2890625, "learning_rate": 4.827600235469628e-05, "loss": 2.2455, "step": 4786 }, { "epoch": 0.64, "grad_norm": 0.291015625, "learning_rate": 4.827522987580613e-05, "loss": 2.249, "step": 4787 }, { "epoch": 0.64, "grad_norm": 0.28125, "learning_rate": 4.827445723007396e-05, "loss": 2.2817, "step": 4788 }, { "epoch": 0.64, "grad_norm": 0.287109375, "learning_rate": 4.8273684417505306e-05, "loss": 2.2691, "step": 4789 }, { "epoch": 0.64, "grad_norm": 0.28125, "learning_rate": 4.827291143810571e-05, "loss": 2.2733, "step": 4790 }, { "epoch": 0.64, "grad_norm": 0.283203125, "learning_rate": 4.82721382918807e-05, "loss": 2.2714, "step": 4791 }, { "epoch": 0.64, "grad_norm": 0.287109375, "learning_rate": 4.827136497883584e-05, "loss": 2.2872, "step": 4792 }, { "epoch": 0.64, "grad_norm": 0.287109375, "learning_rate": 4.827059149897666e-05, "loss": 2.2505, "step": 4793 }, { "epoch": 0.64, "grad_norm": 0.291015625, "learning_rate": 4.8269817852308705e-05, "loss": 2.2761, "step": 4794 }, { "epoch": 0.64, "grad_norm": 0.28515625, "learning_rate": 4.826904403883753e-05, "loss": 2.265, "step": 4795 }, { "epoch": 0.64, "grad_norm": 0.283203125, "learning_rate": 4.8268270058568674e-05, "loss": 2.2762, "step": 4796 }, { "epoch": 0.64, "grad_norm": 0.291015625, "learning_rate": 4.8267495911507684e-05, "loss": 2.3008, "step": 4797 }, { "epoch": 0.64, "grad_norm": 0.2890625, "learning_rate": 4.8266721597660106e-05, "loss": 2.3088, "step": 4798 }, { "epoch": 0.64, "grad_norm": 0.2890625, "learning_rate": 4.826594711703151e-05, "loss": 2.2591, "step": 4799 }, { "epoch": 0.64, "grad_norm": 0.310546875, "learning_rate": 4.826517246962742e-05, "loss": 2.2623, "step": 4800 }, { "epoch": 0.64, "grad_norm": 0.30078125, "learning_rate": 4.8264397655453404e-05, "loss": 2.2783, "step": 4801 }, { "epoch": 0.64, "grad_norm": 0.294921875, "learning_rate": 4.826362267451501e-05, "loss": 2.3056, "step": 4802 }, { "epoch": 0.64, "grad_norm": 0.287109375, "learning_rate": 4.82628475268178e-05, "loss": 2.2995, "step": 4803 }, { "epoch": 0.64, "grad_norm": 0.298828125, "learning_rate": 4.826207221236734e-05, "loss": 2.2835, "step": 4804 }, { "epoch": 0.64, "grad_norm": 0.29296875, "learning_rate": 4.826129673116916e-05, "loss": 2.2932, "step": 4805 }, { "epoch": 0.64, "grad_norm": 0.279296875, "learning_rate": 4.826052108322884e-05, "loss": 2.2658, "step": 4806 }, { "epoch": 0.64, "grad_norm": 0.294921875, "learning_rate": 4.8259745268551936e-05, "loss": 2.2755, "step": 4807 }, { "epoch": 0.64, "grad_norm": 0.29296875, "learning_rate": 4.825896928714401e-05, "loss": 2.259, "step": 4808 }, { "epoch": 0.64, "grad_norm": 0.30859375, "learning_rate": 4.8258193139010615e-05, "loss": 2.2615, "step": 4809 }, { "epoch": 0.64, "grad_norm": 0.306640625, "learning_rate": 4.825741682415732e-05, "loss": 2.2729, "step": 4810 }, { "epoch": 0.64, "grad_norm": 0.296875, "learning_rate": 4.8256640342589696e-05, "loss": 2.2873, "step": 4811 }, { "epoch": 0.64, "grad_norm": 0.298828125, "learning_rate": 4.8255863694313305e-05, "loss": 2.2579, "step": 4812 }, { "epoch": 0.64, "grad_norm": 0.28125, "learning_rate": 4.825508687933371e-05, "loss": 2.2635, "step": 4813 }, { "epoch": 0.64, "grad_norm": 0.279296875, "learning_rate": 4.825430989765649e-05, "loss": 2.2694, "step": 4814 }, { "epoch": 0.64, "grad_norm": 0.314453125, "learning_rate": 4.82535327492872e-05, "loss": 2.268, "step": 4815 }, { "epoch": 0.64, "grad_norm": 0.294921875, "learning_rate": 4.8252755434231424e-05, "loss": 2.2723, "step": 4816 }, { "epoch": 0.64, "grad_norm": 0.3046875, "learning_rate": 4.825197795249472e-05, "loss": 2.2521, "step": 4817 }, { "epoch": 0.64, "grad_norm": 0.296875, "learning_rate": 4.8251200304082676e-05, "loss": 2.272, "step": 4818 }, { "epoch": 0.64, "grad_norm": 0.294921875, "learning_rate": 4.825042248900086e-05, "loss": 2.2469, "step": 4819 }, { "epoch": 0.64, "grad_norm": 0.29296875, "learning_rate": 4.824964450725484e-05, "loss": 2.255, "step": 4820 }, { "epoch": 0.64, "grad_norm": 0.322265625, "learning_rate": 4.824886635885021e-05, "loss": 2.28, "step": 4821 }, { "epoch": 0.64, "grad_norm": 0.28125, "learning_rate": 4.8248088043792535e-05, "loss": 2.2813, "step": 4822 }, { "epoch": 0.64, "grad_norm": 0.294921875, "learning_rate": 4.82473095620874e-05, "loss": 2.2479, "step": 4823 }, { "epoch": 0.64, "grad_norm": 0.283203125, "learning_rate": 4.824653091374037e-05, "loss": 2.274, "step": 4824 }, { "epoch": 0.64, "grad_norm": 0.28515625, "learning_rate": 4.824575209875705e-05, "loss": 2.2836, "step": 4825 }, { "epoch": 0.64, "grad_norm": 0.29296875, "learning_rate": 4.8244973117143006e-05, "loss": 2.2843, "step": 4826 }, { "epoch": 0.64, "grad_norm": 0.29296875, "learning_rate": 4.8244193968903826e-05, "loss": 2.2728, "step": 4827 }, { "epoch": 0.64, "grad_norm": 0.3046875, "learning_rate": 4.824341465404511e-05, "loss": 2.2699, "step": 4828 }, { "epoch": 0.64, "grad_norm": 0.28515625, "learning_rate": 4.824263517257241e-05, "loss": 2.2729, "step": 4829 }, { "epoch": 0.64, "grad_norm": 0.306640625, "learning_rate": 4.8241855524491346e-05, "loss": 2.2816, "step": 4830 }, { "epoch": 0.64, "grad_norm": 0.287109375, "learning_rate": 4.824107570980749e-05, "loss": 2.2763, "step": 4831 }, { "epoch": 0.64, "grad_norm": 0.30078125, "learning_rate": 4.8240295728526444e-05, "loss": 2.2629, "step": 4832 }, { "epoch": 0.64, "grad_norm": 0.283203125, "learning_rate": 4.8239515580653796e-05, "loss": 2.259, "step": 4833 }, { "epoch": 0.64, "grad_norm": 0.291015625, "learning_rate": 4.8238735266195125e-05, "loss": 2.2652, "step": 4834 }, { "epoch": 0.64, "grad_norm": 0.30078125, "learning_rate": 4.8237954785156034e-05, "loss": 2.2612, "step": 4835 }, { "epoch": 0.65, "grad_norm": 0.29296875, "learning_rate": 4.8237174137542127e-05, "loss": 2.2589, "step": 4836 }, { "epoch": 0.65, "grad_norm": 0.29296875, "learning_rate": 4.823639332335898e-05, "loss": 2.2843, "step": 4837 }, { "epoch": 0.65, "grad_norm": 0.28125, "learning_rate": 4.8235612342612205e-05, "loss": 2.2424, "step": 4838 }, { "epoch": 0.65, "grad_norm": 0.29296875, "learning_rate": 4.823483119530739e-05, "loss": 2.2475, "step": 4839 }, { "epoch": 0.65, "grad_norm": 0.31640625, "learning_rate": 4.823404988145015e-05, "loss": 2.2487, "step": 4840 }, { "epoch": 0.65, "grad_norm": 0.294921875, "learning_rate": 4.8233268401046076e-05, "loss": 2.2594, "step": 4841 }, { "epoch": 0.65, "grad_norm": 0.27734375, "learning_rate": 4.823248675410076e-05, "loss": 2.2701, "step": 4842 }, { "epoch": 0.65, "grad_norm": 0.275390625, "learning_rate": 4.8231704940619814e-05, "loss": 2.2657, "step": 4843 }, { "epoch": 0.65, "grad_norm": 0.298828125, "learning_rate": 4.823092296060885e-05, "loss": 2.2778, "step": 4844 }, { "epoch": 0.65, "grad_norm": 0.51171875, "learning_rate": 4.8230140814073466e-05, "loss": 2.2942, "step": 4845 }, { "epoch": 0.65, "grad_norm": 0.2890625, "learning_rate": 4.8229358501019264e-05, "loss": 2.2889, "step": 4846 }, { "epoch": 0.65, "grad_norm": 0.30078125, "learning_rate": 4.822857602145186e-05, "loss": 2.2499, "step": 4847 }, { "epoch": 0.65, "grad_norm": 0.298828125, "learning_rate": 4.8227793375376854e-05, "loss": 2.2741, "step": 4848 }, { "epoch": 0.65, "grad_norm": 0.29296875, "learning_rate": 4.8227010562799866e-05, "loss": 2.2715, "step": 4849 }, { "epoch": 0.65, "grad_norm": 0.2890625, "learning_rate": 4.8226227583726504e-05, "loss": 2.2293, "step": 4850 }, { "epoch": 0.65, "grad_norm": 0.298828125, "learning_rate": 4.822544443816238e-05, "loss": 2.25, "step": 4851 }, { "epoch": 0.65, "grad_norm": 0.3125, "learning_rate": 4.822466112611311e-05, "loss": 2.2334, "step": 4852 }, { "epoch": 0.65, "grad_norm": 0.294921875, "learning_rate": 4.82238776475843e-05, "loss": 2.2644, "step": 4853 }, { "epoch": 0.65, "grad_norm": 0.28515625, "learning_rate": 4.822309400258157e-05, "loss": 2.298, "step": 4854 }, { "epoch": 0.65, "grad_norm": 0.291015625, "learning_rate": 4.8222310191110557e-05, "loss": 2.2594, "step": 4855 }, { "epoch": 0.65, "grad_norm": 0.287109375, "learning_rate": 4.822152621317685e-05, "loss": 2.2606, "step": 4856 }, { "epoch": 0.65, "grad_norm": 0.30859375, "learning_rate": 4.8220742068786074e-05, "loss": 2.2228, "step": 4857 }, { "epoch": 0.65, "grad_norm": 0.283203125, "learning_rate": 4.821995775794387e-05, "loss": 2.2803, "step": 4858 }, { "epoch": 0.65, "grad_norm": 0.30078125, "learning_rate": 4.8219173280655836e-05, "loss": 2.234, "step": 4859 }, { "epoch": 0.65, "grad_norm": 0.291015625, "learning_rate": 4.821838863692762e-05, "loss": 2.2704, "step": 4860 }, { "epoch": 0.65, "grad_norm": 0.3203125, "learning_rate": 4.821760382676482e-05, "loss": 2.2506, "step": 4861 }, { "epoch": 0.65, "grad_norm": 0.314453125, "learning_rate": 4.8216818850173086e-05, "loss": 2.2692, "step": 4862 }, { "epoch": 0.65, "grad_norm": 0.318359375, "learning_rate": 4.821603370715802e-05, "loss": 2.262, "step": 4863 }, { "epoch": 0.65, "grad_norm": 0.2890625, "learning_rate": 4.821524839772528e-05, "loss": 2.2789, "step": 4864 }, { "epoch": 0.65, "grad_norm": 0.294921875, "learning_rate": 4.8214462921880465e-05, "loss": 2.2461, "step": 4865 }, { "epoch": 0.65, "grad_norm": 0.27734375, "learning_rate": 4.8213677279629224e-05, "loss": 2.2595, "step": 4866 }, { "epoch": 0.65, "grad_norm": 0.28515625, "learning_rate": 4.821289147097718e-05, "loss": 2.2986, "step": 4867 }, { "epoch": 0.65, "grad_norm": 0.287109375, "learning_rate": 4.8212105495929983e-05, "loss": 2.2584, "step": 4868 }, { "epoch": 0.65, "grad_norm": 0.31640625, "learning_rate": 4.8211319354493245e-05, "loss": 2.2611, "step": 4869 }, { "epoch": 0.65, "grad_norm": 0.287109375, "learning_rate": 4.821053304667261e-05, "loss": 2.3083, "step": 4870 }, { "epoch": 0.65, "grad_norm": 0.302734375, "learning_rate": 4.8209746572473715e-05, "loss": 2.2535, "step": 4871 }, { "epoch": 0.65, "grad_norm": 0.28515625, "learning_rate": 4.82089599319022e-05, "loss": 2.28, "step": 4872 }, { "epoch": 0.65, "grad_norm": 0.294921875, "learning_rate": 4.8208173124963695e-05, "loss": 2.2403, "step": 4873 }, { "epoch": 0.65, "grad_norm": 0.291015625, "learning_rate": 4.8207386151663855e-05, "loss": 2.2823, "step": 4874 }, { "epoch": 0.65, "grad_norm": 0.2890625, "learning_rate": 4.82065990120083e-05, "loss": 2.252, "step": 4875 }, { "epoch": 0.65, "grad_norm": 0.296875, "learning_rate": 4.8205811706002695e-05, "loss": 2.2706, "step": 4876 }, { "epoch": 0.65, "grad_norm": 0.294921875, "learning_rate": 4.820502423365267e-05, "loss": 2.2596, "step": 4877 }, { "epoch": 0.65, "grad_norm": 0.3203125, "learning_rate": 4.820423659496388e-05, "loss": 2.2575, "step": 4878 }, { "epoch": 0.65, "grad_norm": 0.29296875, "learning_rate": 4.820344878994195e-05, "loss": 2.2638, "step": 4879 }, { "epoch": 0.65, "grad_norm": 0.29296875, "learning_rate": 4.820266081859255e-05, "loss": 2.2546, "step": 4880 }, { "epoch": 0.65, "grad_norm": 0.29296875, "learning_rate": 4.8201872680921316e-05, "loss": 2.2396, "step": 4881 }, { "epoch": 0.65, "grad_norm": 0.298828125, "learning_rate": 4.82010843769339e-05, "loss": 2.2751, "step": 4882 }, { "epoch": 0.65, "grad_norm": 0.302734375, "learning_rate": 4.820029590663596e-05, "loss": 2.284, "step": 4883 }, { "epoch": 0.65, "grad_norm": 0.29296875, "learning_rate": 4.819950727003314e-05, "loss": 2.2464, "step": 4884 }, { "epoch": 0.65, "grad_norm": 0.287109375, "learning_rate": 4.819871846713109e-05, "loss": 2.2662, "step": 4885 }, { "epoch": 0.65, "grad_norm": 0.298828125, "learning_rate": 4.8197929497935475e-05, "loss": 2.2587, "step": 4886 }, { "epoch": 0.65, "grad_norm": 0.31640625, "learning_rate": 4.819714036245194e-05, "loss": 2.2634, "step": 4887 }, { "epoch": 0.65, "grad_norm": 0.306640625, "learning_rate": 4.8196351060686154e-05, "loss": 2.2692, "step": 4888 }, { "epoch": 0.65, "grad_norm": 0.30859375, "learning_rate": 4.8195561592643755e-05, "loss": 2.2664, "step": 4889 }, { "epoch": 0.65, "grad_norm": 0.294921875, "learning_rate": 4.819477195833042e-05, "loss": 2.2797, "step": 4890 }, { "epoch": 0.65, "grad_norm": 0.310546875, "learning_rate": 4.8193982157751806e-05, "loss": 2.2705, "step": 4891 }, { "epoch": 0.65, "grad_norm": 0.294921875, "learning_rate": 4.819319219091357e-05, "loss": 2.2804, "step": 4892 }, { "epoch": 0.65, "grad_norm": 0.322265625, "learning_rate": 4.819240205782137e-05, "loss": 2.2602, "step": 4893 }, { "epoch": 0.65, "grad_norm": 0.3046875, "learning_rate": 4.8191611758480895e-05, "loss": 2.2757, "step": 4894 }, { "epoch": 0.65, "grad_norm": 0.30078125, "learning_rate": 4.8190821292897776e-05, "loss": 2.269, "step": 4895 }, { "epoch": 0.65, "grad_norm": 0.318359375, "learning_rate": 4.81900306610777e-05, "loss": 2.2825, "step": 4896 }, { "epoch": 0.65, "grad_norm": 0.2890625, "learning_rate": 4.818923986302633e-05, "loss": 2.2646, "step": 4897 }, { "epoch": 0.65, "grad_norm": 0.30078125, "learning_rate": 4.818844889874933e-05, "loss": 2.2865, "step": 4898 }, { "epoch": 0.65, "grad_norm": 0.74609375, "learning_rate": 4.818765776825237e-05, "loss": 2.2944, "step": 4899 }, { "epoch": 0.65, "grad_norm": 0.28515625, "learning_rate": 4.818686647154114e-05, "loss": 2.2362, "step": 4900 }, { "epoch": 0.65, "grad_norm": 0.2734375, "learning_rate": 4.818607500862128e-05, "loss": 2.2568, "step": 4901 }, { "epoch": 0.65, "grad_norm": 0.287109375, "learning_rate": 4.818528337949849e-05, "loss": 2.2937, "step": 4902 }, { "epoch": 0.65, "grad_norm": 0.294921875, "learning_rate": 4.818449158417844e-05, "loss": 2.2841, "step": 4903 }, { "epoch": 0.65, "grad_norm": 0.3046875, "learning_rate": 4.818369962266679e-05, "loss": 2.2619, "step": 4904 }, { "epoch": 0.65, "grad_norm": 0.28515625, "learning_rate": 4.818290749496923e-05, "loss": 2.2879, "step": 4905 }, { "epoch": 0.65, "grad_norm": 0.306640625, "learning_rate": 4.818211520109143e-05, "loss": 2.2863, "step": 4906 }, { "epoch": 0.65, "grad_norm": 0.287109375, "learning_rate": 4.818132274103908e-05, "loss": 2.2642, "step": 4907 }, { "epoch": 0.65, "grad_norm": 0.296875, "learning_rate": 4.818053011481786e-05, "loss": 2.2591, "step": 4908 }, { "epoch": 0.65, "grad_norm": 0.28125, "learning_rate": 4.817973732243345e-05, "loss": 2.2658, "step": 4909 }, { "epoch": 0.65, "grad_norm": 0.28125, "learning_rate": 4.817894436389152e-05, "loss": 2.2489, "step": 4910 }, { "epoch": 0.66, "grad_norm": 0.3046875, "learning_rate": 4.8178151239197766e-05, "loss": 2.2602, "step": 4911 }, { "epoch": 0.66, "grad_norm": 0.294921875, "learning_rate": 4.817735794835788e-05, "loss": 2.2772, "step": 4912 }, { "epoch": 0.66, "grad_norm": 0.2890625, "learning_rate": 4.817656449137753e-05, "loss": 2.2353, "step": 4913 }, { "epoch": 0.66, "grad_norm": 0.2890625, "learning_rate": 4.8175770868262424e-05, "loss": 2.2616, "step": 4914 }, { "epoch": 0.66, "grad_norm": 0.302734375, "learning_rate": 4.8174977079018236e-05, "loss": 2.2505, "step": 4915 }, { "epoch": 0.66, "grad_norm": 0.28515625, "learning_rate": 4.817418312365066e-05, "loss": 2.2647, "step": 4916 }, { "epoch": 0.66, "grad_norm": 0.30078125, "learning_rate": 4.817338900216538e-05, "loss": 2.3092, "step": 4917 }, { "epoch": 0.66, "grad_norm": 0.296875, "learning_rate": 4.817259471456811e-05, "loss": 2.273, "step": 4918 }, { "epoch": 0.66, "grad_norm": 0.30078125, "learning_rate": 4.8171800260864516e-05, "loss": 2.268, "step": 4919 }, { "epoch": 0.66, "grad_norm": 0.298828125, "learning_rate": 4.8171005641060315e-05, "loss": 2.2634, "step": 4920 }, { "epoch": 0.66, "grad_norm": 0.294921875, "learning_rate": 4.81702108551612e-05, "loss": 2.2513, "step": 4921 }, { "epoch": 0.66, "grad_norm": 0.291015625, "learning_rate": 4.8169415903172855e-05, "loss": 2.2473, "step": 4922 }, { "epoch": 0.66, "grad_norm": 0.29296875, "learning_rate": 4.8168620785100995e-05, "loss": 2.2565, "step": 4923 }, { "epoch": 0.66, "grad_norm": 0.291015625, "learning_rate": 4.81678255009513e-05, "loss": 2.253, "step": 4924 }, { "epoch": 0.66, "grad_norm": 0.27734375, "learning_rate": 4.816703005072949e-05, "loss": 2.2487, "step": 4925 }, { "epoch": 0.66, "grad_norm": 0.275390625, "learning_rate": 4.816623443444125e-05, "loss": 2.261, "step": 4926 }, { "epoch": 0.66, "grad_norm": 0.298828125, "learning_rate": 4.81654386520923e-05, "loss": 2.269, "step": 4927 }, { "epoch": 0.66, "grad_norm": 0.298828125, "learning_rate": 4.816464270368833e-05, "loss": 2.2615, "step": 4928 }, { "epoch": 0.66, "grad_norm": 0.294921875, "learning_rate": 4.816384658923506e-05, "loss": 2.2662, "step": 4929 }, { "epoch": 0.66, "grad_norm": 0.302734375, "learning_rate": 4.8163050308738186e-05, "loss": 2.3018, "step": 4930 }, { "epoch": 0.66, "grad_norm": 0.30078125, "learning_rate": 4.816225386220342e-05, "loss": 2.2889, "step": 4931 }, { "epoch": 0.66, "grad_norm": 0.29296875, "learning_rate": 4.8161457249636465e-05, "loss": 2.2837, "step": 4932 }, { "epoch": 0.66, "grad_norm": 0.296875, "learning_rate": 4.816066047104304e-05, "loss": 2.3002, "step": 4933 }, { "epoch": 0.66, "grad_norm": 0.287109375, "learning_rate": 4.815986352642885e-05, "loss": 2.2639, "step": 4934 }, { "epoch": 0.66, "grad_norm": 0.2890625, "learning_rate": 4.81590664157996e-05, "loss": 2.2576, "step": 4935 }, { "epoch": 0.66, "grad_norm": 0.314453125, "learning_rate": 4.8158269139161026e-05, "loss": 2.235, "step": 4936 }, { "epoch": 0.66, "grad_norm": 0.29296875, "learning_rate": 4.815747169651883e-05, "loss": 2.2871, "step": 4937 }, { "epoch": 0.66, "grad_norm": 0.287109375, "learning_rate": 4.815667408787873e-05, "loss": 2.2614, "step": 4938 }, { "epoch": 0.66, "grad_norm": 0.30859375, "learning_rate": 4.8155876313246437e-05, "loss": 2.2315, "step": 4939 }, { "epoch": 0.66, "grad_norm": 0.3203125, "learning_rate": 4.8155078372627685e-05, "loss": 2.2654, "step": 4940 }, { "epoch": 0.66, "grad_norm": 0.296875, "learning_rate": 4.815428026602817e-05, "loss": 2.2838, "step": 4941 }, { "epoch": 0.66, "grad_norm": 0.30078125, "learning_rate": 4.815348199345363e-05, "loss": 2.2302, "step": 4942 }, { "epoch": 0.66, "grad_norm": 0.2890625, "learning_rate": 4.8152683554909794e-05, "loss": 2.292, "step": 4943 }, { "epoch": 0.66, "grad_norm": 0.27734375, "learning_rate": 4.815188495040237e-05, "loss": 2.2685, "step": 4944 }, { "epoch": 0.66, "grad_norm": 0.302734375, "learning_rate": 4.815108617993709e-05, "loss": 2.2457, "step": 4945 }, { "epoch": 0.66, "grad_norm": 0.310546875, "learning_rate": 4.815028724351968e-05, "loss": 2.2733, "step": 4946 }, { "epoch": 0.66, "grad_norm": 0.296875, "learning_rate": 4.814948814115585e-05, "loss": 2.2781, "step": 4947 }, { "epoch": 0.66, "grad_norm": 0.298828125, "learning_rate": 4.8148688872851354e-05, "loss": 2.2598, "step": 4948 }, { "epoch": 0.66, "grad_norm": 0.302734375, "learning_rate": 4.814788943861191e-05, "loss": 2.264, "step": 4949 }, { "epoch": 0.66, "grad_norm": 0.291015625, "learning_rate": 4.8147089838443256e-05, "loss": 2.2572, "step": 4950 }, { "epoch": 0.66, "grad_norm": 0.298828125, "learning_rate": 4.814629007235111e-05, "loss": 2.2563, "step": 4951 }, { "epoch": 0.66, "grad_norm": 0.29296875, "learning_rate": 4.814549014034121e-05, "loss": 2.2834, "step": 4952 }, { "epoch": 0.66, "grad_norm": 0.294921875, "learning_rate": 4.8144690042419294e-05, "loss": 2.2267, "step": 4953 }, { "epoch": 0.66, "grad_norm": 0.302734375, "learning_rate": 4.8143889778591094e-05, "loss": 2.2739, "step": 4954 }, { "epoch": 0.66, "grad_norm": 0.302734375, "learning_rate": 4.8143089348862344e-05, "loss": 2.2669, "step": 4955 }, { "epoch": 0.66, "grad_norm": 0.28125, "learning_rate": 4.8142288753238796e-05, "loss": 2.2672, "step": 4956 }, { "epoch": 0.66, "grad_norm": 0.296875, "learning_rate": 4.814148799172616e-05, "loss": 2.3009, "step": 4957 }, { "epoch": 0.66, "grad_norm": 0.2890625, "learning_rate": 4.814068706433022e-05, "loss": 2.2558, "step": 4958 }, { "epoch": 0.66, "grad_norm": 0.2890625, "learning_rate": 4.8139885971056665e-05, "loss": 2.2731, "step": 4959 }, { "epoch": 0.66, "grad_norm": 0.28515625, "learning_rate": 4.8139084711911274e-05, "loss": 2.2387, "step": 4960 }, { "epoch": 0.66, "grad_norm": 0.30859375, "learning_rate": 4.813828328689978e-05, "loss": 2.2475, "step": 4961 }, { "epoch": 0.66, "grad_norm": 0.283203125, "learning_rate": 4.813748169602793e-05, "loss": 2.2899, "step": 4962 }, { "epoch": 0.66, "grad_norm": 0.27734375, "learning_rate": 4.8136679939301465e-05, "loss": 2.2577, "step": 4963 }, { "epoch": 0.66, "grad_norm": 0.296875, "learning_rate": 4.813587801672614e-05, "loss": 2.2727, "step": 4964 }, { "epoch": 0.66, "grad_norm": 0.2890625, "learning_rate": 4.8135075928307694e-05, "loss": 2.268, "step": 4965 }, { "epoch": 0.66, "grad_norm": 0.28515625, "learning_rate": 4.8134273674051876e-05, "loss": 2.2747, "step": 4966 }, { "epoch": 0.66, "grad_norm": 0.279296875, "learning_rate": 4.813347125396445e-05, "loss": 2.288, "step": 4967 }, { "epoch": 0.66, "grad_norm": 0.294921875, "learning_rate": 4.813266866805116e-05, "loss": 2.271, "step": 4968 }, { "epoch": 0.66, "grad_norm": 0.28125, "learning_rate": 4.813186591631775e-05, "loss": 2.2957, "step": 4969 }, { "epoch": 0.66, "grad_norm": 0.29296875, "learning_rate": 4.8131062998769996e-05, "loss": 2.2671, "step": 4970 }, { "epoch": 0.66, "grad_norm": 0.302734375, "learning_rate": 4.8130259915413636e-05, "loss": 2.2662, "step": 4971 }, { "epoch": 0.66, "grad_norm": 0.296875, "learning_rate": 4.8129456666254426e-05, "loss": 2.2676, "step": 4972 }, { "epoch": 0.66, "grad_norm": 0.298828125, "learning_rate": 4.8128653251298125e-05, "loss": 2.2782, "step": 4973 }, { "epoch": 0.66, "grad_norm": 0.3046875, "learning_rate": 4.8127849670550506e-05, "loss": 2.2464, "step": 4974 }, { "epoch": 0.66, "grad_norm": 0.296875, "learning_rate": 4.8127045924017324e-05, "loss": 2.2661, "step": 4975 }, { "epoch": 0.66, "grad_norm": 0.310546875, "learning_rate": 4.812624201170432e-05, "loss": 2.2802, "step": 4976 }, { "epoch": 0.66, "grad_norm": 0.29296875, "learning_rate": 4.8125437933617285e-05, "loss": 2.2537, "step": 4977 }, { "epoch": 0.66, "grad_norm": 0.294921875, "learning_rate": 4.812463368976197e-05, "loss": 2.2699, "step": 4978 }, { "epoch": 0.66, "grad_norm": 0.287109375, "learning_rate": 4.8123829280144134e-05, "loss": 2.2575, "step": 4979 }, { "epoch": 0.66, "grad_norm": 0.2890625, "learning_rate": 4.812302470476955e-05, "loss": 2.2899, "step": 4980 }, { "epoch": 0.66, "grad_norm": 0.306640625, "learning_rate": 4.812221996364399e-05, "loss": 2.2862, "step": 4981 }, { "epoch": 0.66, "grad_norm": 0.296875, "learning_rate": 4.8121415056773214e-05, "loss": 2.2605, "step": 4982 }, { "epoch": 0.66, "grad_norm": 0.2890625, "learning_rate": 4.812060998416299e-05, "loss": 2.2694, "step": 4983 }, { "epoch": 0.66, "grad_norm": 0.283203125, "learning_rate": 4.81198047458191e-05, "loss": 2.2461, "step": 4984 }, { "epoch": 0.66, "grad_norm": 0.294921875, "learning_rate": 4.811899934174731e-05, "loss": 2.256, "step": 4985 }, { "epoch": 0.67, "grad_norm": 0.298828125, "learning_rate": 4.8118193771953394e-05, "loss": 2.2676, "step": 4986 }, { "epoch": 0.67, "grad_norm": 0.287109375, "learning_rate": 4.811738803644313e-05, "loss": 2.2667, "step": 4987 }, { "epoch": 0.67, "grad_norm": 0.3125, "learning_rate": 4.811658213522228e-05, "loss": 2.2796, "step": 4988 }, { "epoch": 0.67, "grad_norm": 0.3125, "learning_rate": 4.811577606829664e-05, "loss": 2.2876, "step": 4989 }, { "epoch": 0.67, "grad_norm": 0.2890625, "learning_rate": 4.811496983567198e-05, "loss": 2.2622, "step": 4990 }, { "epoch": 0.67, "grad_norm": 0.291015625, "learning_rate": 4.8114163437354064e-05, "loss": 2.2904, "step": 4991 }, { "epoch": 0.67, "grad_norm": 0.2890625, "learning_rate": 4.8113356873348694e-05, "loss": 2.2848, "step": 4992 }, { "epoch": 0.67, "grad_norm": 0.2890625, "learning_rate": 4.811255014366165e-05, "loss": 2.261, "step": 4993 }, { "epoch": 0.67, "grad_norm": 0.310546875, "learning_rate": 4.81117432482987e-05, "loss": 2.2706, "step": 4994 }, { "epoch": 0.67, "grad_norm": 0.279296875, "learning_rate": 4.811093618726564e-05, "loss": 2.2567, "step": 4995 }, { "epoch": 0.67, "grad_norm": 0.287109375, "learning_rate": 4.8110128960568256e-05, "loss": 2.2433, "step": 4996 }, { "epoch": 0.67, "grad_norm": 0.291015625, "learning_rate": 4.810932156821233e-05, "loss": 2.2855, "step": 4997 }, { "epoch": 0.67, "grad_norm": 0.296875, "learning_rate": 4.8108514010203644e-05, "loss": 2.2497, "step": 4998 }, { "epoch": 0.67, "grad_norm": 0.28515625, "learning_rate": 4.810770628654799e-05, "loss": 2.2845, "step": 4999 }, { "epoch": 0.67, "grad_norm": 0.26953125, "learning_rate": 4.8106898397251174e-05, "loss": 2.2573, "step": 5000 }, { "epoch": 0.67, "eval_loss": 2.2635653018951416, "eval_runtime": 607.897, "eval_samples_per_second": 63.779, "eval_steps_per_second": 7.973, "step": 5000 }, { "epoch": 0.67, "grad_norm": 0.296875, "learning_rate": 4.810609034231896e-05, "loss": 2.2684, "step": 5001 }, { "epoch": 0.67, "grad_norm": 0.279296875, "learning_rate": 4.810528212175717e-05, "loss": 2.2415, "step": 5002 }, { "epoch": 0.67, "grad_norm": 0.28125, "learning_rate": 4.810447373557156e-05, "loss": 2.2613, "step": 5003 }, { "epoch": 0.67, "grad_norm": 0.3125, "learning_rate": 4.810366518376797e-05, "loss": 2.2803, "step": 5004 }, { "epoch": 0.67, "grad_norm": 0.294921875, "learning_rate": 4.810285646635215e-05, "loss": 2.2761, "step": 5005 }, { "epoch": 0.67, "grad_norm": 0.287109375, "learning_rate": 4.810204758332994e-05, "loss": 2.2526, "step": 5006 }, { "epoch": 0.67, "grad_norm": 0.314453125, "learning_rate": 4.81012385347071e-05, "loss": 2.285, "step": 5007 }, { "epoch": 0.67, "grad_norm": 0.294921875, "learning_rate": 4.810042932048946e-05, "loss": 2.2395, "step": 5008 }, { "epoch": 0.67, "grad_norm": 0.3125, "learning_rate": 4.80996199406828e-05, "loss": 2.2686, "step": 5009 }, { "epoch": 0.67, "grad_norm": 0.30859375, "learning_rate": 4.8098810395292924e-05, "loss": 2.2715, "step": 5010 }, { "epoch": 0.67, "grad_norm": 0.30859375, "learning_rate": 4.809800068432565e-05, "loss": 2.2368, "step": 5011 }, { "epoch": 0.67, "grad_norm": 0.302734375, "learning_rate": 4.809719080778678e-05, "loss": 2.2618, "step": 5012 }, { "epoch": 0.67, "grad_norm": 0.3046875, "learning_rate": 4.8096380765682095e-05, "loss": 2.2704, "step": 5013 }, { "epoch": 0.67, "grad_norm": 0.28515625, "learning_rate": 4.809557055801743e-05, "loss": 2.2802, "step": 5014 }, { "epoch": 0.67, "grad_norm": 0.296875, "learning_rate": 4.809476018479857e-05, "loss": 2.3006, "step": 5015 }, { "epoch": 0.67, "grad_norm": 0.302734375, "learning_rate": 4.809394964603134e-05, "loss": 2.2941, "step": 5016 }, { "epoch": 0.67, "grad_norm": 0.291015625, "learning_rate": 4.8093138941721554e-05, "loss": 2.2898, "step": 5017 }, { "epoch": 0.67, "grad_norm": 0.298828125, "learning_rate": 4.809232807187501e-05, "loss": 2.2654, "step": 5018 }, { "epoch": 0.67, "grad_norm": 0.29296875, "learning_rate": 4.809151703649752e-05, "loss": 2.2648, "step": 5019 }, { "epoch": 0.67, "grad_norm": 0.291015625, "learning_rate": 4.809070583559491e-05, "loss": 2.2922, "step": 5020 }, { "epoch": 0.67, "grad_norm": 0.296875, "learning_rate": 4.808989446917298e-05, "loss": 2.282, "step": 5021 }, { "epoch": 0.67, "grad_norm": 0.30078125, "learning_rate": 4.808908293723756e-05, "loss": 2.275, "step": 5022 }, { "epoch": 0.67, "grad_norm": 0.322265625, "learning_rate": 4.808827123979446e-05, "loss": 2.2637, "step": 5023 }, { "epoch": 0.67, "grad_norm": 0.2890625, "learning_rate": 4.8087459376849496e-05, "loss": 2.2502, "step": 5024 }, { "epoch": 0.67, "grad_norm": 0.29296875, "learning_rate": 4.80866473484085e-05, "loss": 2.2668, "step": 5025 }, { "epoch": 0.67, "grad_norm": 0.302734375, "learning_rate": 4.808583515447727e-05, "loss": 2.2295, "step": 5026 }, { "epoch": 0.67, "grad_norm": 0.298828125, "learning_rate": 4.808502279506165e-05, "loss": 2.2589, "step": 5027 }, { "epoch": 0.67, "grad_norm": 0.28125, "learning_rate": 4.808421027016745e-05, "loss": 2.263, "step": 5028 }, { "epoch": 0.67, "grad_norm": 0.29296875, "learning_rate": 4.80833975798005e-05, "loss": 2.2465, "step": 5029 }, { "epoch": 0.67, "grad_norm": 0.30859375, "learning_rate": 4.808258472396664e-05, "loss": 2.2761, "step": 5030 }, { "epoch": 0.67, "grad_norm": 0.279296875, "learning_rate": 4.8081771702671663e-05, "loss": 2.2585, "step": 5031 }, { "epoch": 0.67, "grad_norm": 0.294921875, "learning_rate": 4.808095851592143e-05, "loss": 2.2792, "step": 5032 }, { "epoch": 0.67, "grad_norm": 0.3125, "learning_rate": 4.808014516372175e-05, "loss": 2.2815, "step": 5033 }, { "epoch": 0.67, "grad_norm": 0.314453125, "learning_rate": 4.807933164607845e-05, "loss": 2.281, "step": 5034 }, { "epoch": 0.67, "grad_norm": 0.294921875, "learning_rate": 4.807851796299738e-05, "loss": 2.2602, "step": 5035 }, { "epoch": 0.67, "grad_norm": 0.306640625, "learning_rate": 4.8077704114484356e-05, "loss": 2.2991, "step": 5036 }, { "epoch": 0.67, "grad_norm": 0.29296875, "learning_rate": 4.807689010054522e-05, "loss": 2.272, "step": 5037 }, { "epoch": 0.67, "grad_norm": 0.29296875, "learning_rate": 4.807607592118581e-05, "loss": 2.2617, "step": 5038 }, { "epoch": 0.67, "grad_norm": 0.287109375, "learning_rate": 4.807526157641196e-05, "loss": 2.281, "step": 5039 }, { "epoch": 0.67, "grad_norm": 0.296875, "learning_rate": 4.80744470662295e-05, "loss": 2.251, "step": 5040 }, { "epoch": 0.67, "grad_norm": 0.287109375, "learning_rate": 4.8073632390644285e-05, "loss": 2.2828, "step": 5041 }, { "epoch": 0.67, "grad_norm": 0.28125, "learning_rate": 4.807281754966213e-05, "loss": 2.2878, "step": 5042 }, { "epoch": 0.67, "grad_norm": 0.29296875, "learning_rate": 4.80720025432889e-05, "loss": 2.2665, "step": 5043 }, { "epoch": 0.67, "grad_norm": 0.291015625, "learning_rate": 4.807118737153042e-05, "loss": 2.2744, "step": 5044 }, { "epoch": 0.67, "grad_norm": 0.287109375, "learning_rate": 4.807037203439255e-05, "loss": 2.2619, "step": 5045 }, { "epoch": 0.67, "grad_norm": 0.28125, "learning_rate": 4.8069556531881116e-05, "loss": 2.2695, "step": 5046 }, { "epoch": 0.67, "grad_norm": 0.287109375, "learning_rate": 4.806874086400198e-05, "loss": 2.2869, "step": 5047 }, { "epoch": 0.67, "grad_norm": 0.2890625, "learning_rate": 4.806792503076097e-05, "loss": 2.266, "step": 5048 }, { "epoch": 0.67, "grad_norm": 0.28125, "learning_rate": 4.806710903216395e-05, "loss": 2.2601, "step": 5049 }, { "epoch": 0.67, "grad_norm": 0.302734375, "learning_rate": 4.806629286821677e-05, "loss": 2.2976, "step": 5050 }, { "epoch": 0.67, "grad_norm": 0.291015625, "learning_rate": 4.806547653892527e-05, "loss": 2.2857, "step": 5051 }, { "epoch": 0.67, "grad_norm": 0.287109375, "learning_rate": 4.8064660044295315e-05, "loss": 2.3001, "step": 5052 }, { "epoch": 0.67, "grad_norm": 0.287109375, "learning_rate": 4.806384338433274e-05, "loss": 2.286, "step": 5053 }, { "epoch": 0.67, "grad_norm": 0.291015625, "learning_rate": 4.8063026559043414e-05, "loss": 2.2892, "step": 5054 }, { "epoch": 0.67, "grad_norm": 0.298828125, "learning_rate": 4.806220956843318e-05, "loss": 2.2775, "step": 5055 }, { "epoch": 0.67, "grad_norm": 0.2890625, "learning_rate": 4.806139241250791e-05, "loss": 2.3051, "step": 5056 }, { "epoch": 0.67, "grad_norm": 0.28515625, "learning_rate": 4.8060575091273455e-05, "loss": 2.2816, "step": 5057 }, { "epoch": 0.67, "grad_norm": 0.2890625, "learning_rate": 4.805975760473566e-05, "loss": 2.2402, "step": 5058 }, { "epoch": 0.67, "grad_norm": 0.294921875, "learning_rate": 4.8058939952900405e-05, "loss": 2.2594, "step": 5059 }, { "epoch": 0.67, "grad_norm": 0.287109375, "learning_rate": 4.805812213577354e-05, "loss": 2.2753, "step": 5060 }, { "epoch": 0.68, "grad_norm": 0.283203125, "learning_rate": 4.805730415336093e-05, "loss": 2.2511, "step": 5061 }, { "epoch": 0.68, "grad_norm": 0.28125, "learning_rate": 4.805648600566844e-05, "loss": 2.2803, "step": 5062 }, { "epoch": 0.68, "grad_norm": 0.28125, "learning_rate": 4.8055667692701936e-05, "loss": 2.2853, "step": 5063 }, { "epoch": 0.68, "grad_norm": 0.2890625, "learning_rate": 4.8054849214467265e-05, "loss": 2.2477, "step": 5064 }, { "epoch": 0.68, "grad_norm": 0.2890625, "learning_rate": 4.805403057097033e-05, "loss": 2.2752, "step": 5065 }, { "epoch": 0.68, "grad_norm": 0.291015625, "learning_rate": 4.805321176221696e-05, "loss": 2.2619, "step": 5066 }, { "epoch": 0.68, "grad_norm": 0.322265625, "learning_rate": 4.805239278821306e-05, "loss": 2.2751, "step": 5067 }, { "epoch": 0.68, "grad_norm": 0.296875, "learning_rate": 4.8051573648964476e-05, "loss": 2.2897, "step": 5068 }, { "epoch": 0.68, "grad_norm": 0.29296875, "learning_rate": 4.80507543444771e-05, "loss": 2.2678, "step": 5069 }, { "epoch": 0.68, "grad_norm": 0.27734375, "learning_rate": 4.804993487475678e-05, "loss": 2.2273, "step": 5070 }, { "epoch": 0.68, "grad_norm": 0.30859375, "learning_rate": 4.804911523980941e-05, "loss": 2.2855, "step": 5071 }, { "epoch": 0.68, "grad_norm": 0.3125, "learning_rate": 4.804829543964084e-05, "loss": 2.2685, "step": 5072 }, { "epoch": 0.68, "grad_norm": 0.28125, "learning_rate": 4.804747547425699e-05, "loss": 2.2273, "step": 5073 }, { "epoch": 0.68, "grad_norm": 0.310546875, "learning_rate": 4.80466553436637e-05, "loss": 2.2117, "step": 5074 }, { "epoch": 0.68, "grad_norm": 0.296875, "learning_rate": 4.804583504786686e-05, "loss": 2.2932, "step": 5075 }, { "epoch": 0.68, "grad_norm": 0.291015625, "learning_rate": 4.804501458687236e-05, "loss": 2.261, "step": 5076 }, { "epoch": 0.68, "grad_norm": 0.306640625, "learning_rate": 4.804419396068607e-05, "loss": 2.2742, "step": 5077 }, { "epoch": 0.68, "grad_norm": 0.287109375, "learning_rate": 4.8043373169313864e-05, "loss": 2.2593, "step": 5078 }, { "epoch": 0.68, "grad_norm": 0.302734375, "learning_rate": 4.804255221276165e-05, "loss": 2.2737, "step": 5079 }, { "epoch": 0.68, "grad_norm": 0.31640625, "learning_rate": 4.804173109103529e-05, "loss": 2.2605, "step": 5080 }, { "epoch": 0.68, "grad_norm": 0.287109375, "learning_rate": 4.804090980414069e-05, "loss": 2.3021, "step": 5081 }, { "epoch": 0.68, "grad_norm": 0.287109375, "learning_rate": 4.804008835208372e-05, "loss": 2.2502, "step": 5082 }, { "epoch": 0.68, "grad_norm": 0.29296875, "learning_rate": 4.8039266734870274e-05, "loss": 2.2796, "step": 5083 }, { "epoch": 0.68, "grad_norm": 0.283203125, "learning_rate": 4.8038444952506246e-05, "loss": 2.2512, "step": 5084 }, { "epoch": 0.68, "grad_norm": 0.287109375, "learning_rate": 4.803762300499752e-05, "loss": 2.266, "step": 5085 }, { "epoch": 0.68, "grad_norm": 0.287109375, "learning_rate": 4.8036800892349997e-05, "loss": 2.284, "step": 5086 }, { "epoch": 0.68, "grad_norm": 0.296875, "learning_rate": 4.803597861456955e-05, "loss": 2.2921, "step": 5087 }, { "epoch": 0.68, "grad_norm": 0.29296875, "learning_rate": 4.8035156171662106e-05, "loss": 2.2656, "step": 5088 }, { "epoch": 0.68, "grad_norm": 0.298828125, "learning_rate": 4.803433356363354e-05, "loss": 2.2424, "step": 5089 }, { "epoch": 0.68, "grad_norm": 0.291015625, "learning_rate": 4.8033510790489745e-05, "loss": 2.2531, "step": 5090 }, { "epoch": 0.68, "grad_norm": 0.318359375, "learning_rate": 4.803268785223662e-05, "loss": 2.2455, "step": 5091 }, { "epoch": 0.68, "grad_norm": 0.298828125, "learning_rate": 4.8031864748880075e-05, "loss": 2.2751, "step": 5092 }, { "epoch": 0.68, "grad_norm": 0.3125, "learning_rate": 4.8031041480425995e-05, "loss": 2.2755, "step": 5093 }, { "epoch": 0.68, "grad_norm": 0.283203125, "learning_rate": 4.80302180468803e-05, "loss": 2.2764, "step": 5094 }, { "epoch": 0.68, "grad_norm": 0.291015625, "learning_rate": 4.8029394448248875e-05, "loss": 2.2027, "step": 5095 }, { "epoch": 0.68, "grad_norm": 0.2734375, "learning_rate": 4.8028570684537646e-05, "loss": 2.2696, "step": 5096 }, { "epoch": 0.68, "grad_norm": 0.296875, "learning_rate": 4.802774675575249e-05, "loss": 2.2673, "step": 5097 }, { "epoch": 0.68, "grad_norm": 0.31640625, "learning_rate": 4.802692266189933e-05, "loss": 2.247, "step": 5098 }, { "epoch": 0.68, "grad_norm": 0.310546875, "learning_rate": 4.802609840298407e-05, "loss": 2.29, "step": 5099 }, { "epoch": 0.68, "grad_norm": 0.294921875, "learning_rate": 4.8025273979012615e-05, "loss": 2.2573, "step": 5100 }, { "epoch": 0.68, "grad_norm": 0.29296875, "learning_rate": 4.802444938999088e-05, "loss": 2.2526, "step": 5101 }, { "epoch": 0.68, "grad_norm": 0.291015625, "learning_rate": 4.802362463592477e-05, "loss": 2.265, "step": 5102 }, { "epoch": 0.68, "grad_norm": 0.296875, "learning_rate": 4.8022799716820206e-05, "loss": 2.2652, "step": 5103 }, { "epoch": 0.68, "grad_norm": 0.32421875, "learning_rate": 4.802197463268309e-05, "loss": 2.2356, "step": 5104 }, { "epoch": 0.68, "grad_norm": 0.30078125, "learning_rate": 4.802114938351935e-05, "loss": 2.2838, "step": 5105 }, { "epoch": 0.68, "grad_norm": 0.314453125, "learning_rate": 4.802032396933489e-05, "loss": 2.2668, "step": 5106 }, { "epoch": 0.68, "grad_norm": 0.28515625, "learning_rate": 4.801949839013563e-05, "loss": 2.2783, "step": 5107 }, { "epoch": 0.68, "grad_norm": 0.271484375, "learning_rate": 4.801867264592749e-05, "loss": 2.2687, "step": 5108 }, { "epoch": 0.68, "grad_norm": 0.310546875, "learning_rate": 4.8017846736716385e-05, "loss": 2.2645, "step": 5109 }, { "epoch": 0.68, "grad_norm": 0.29296875, "learning_rate": 4.801702066250824e-05, "loss": 2.2719, "step": 5110 }, { "epoch": 0.68, "grad_norm": 0.2890625, "learning_rate": 4.801619442330898e-05, "loss": 2.3008, "step": 5111 }, { "epoch": 0.68, "grad_norm": 0.3046875, "learning_rate": 4.8015368019124513e-05, "loss": 2.2662, "step": 5112 }, { "epoch": 0.68, "grad_norm": 0.296875, "learning_rate": 4.801454144996077e-05, "loss": 2.2754, "step": 5113 }, { "epoch": 0.68, "grad_norm": 0.2890625, "learning_rate": 4.801371471582369e-05, "loss": 2.2802, "step": 5114 }, { "epoch": 0.68, "grad_norm": 0.30859375, "learning_rate": 4.8012887816719176e-05, "loss": 2.2672, "step": 5115 }, { "epoch": 0.68, "grad_norm": 0.302734375, "learning_rate": 4.801206075265317e-05, "loss": 2.3025, "step": 5116 }, { "epoch": 0.68, "grad_norm": 0.291015625, "learning_rate": 4.8011233523631596e-05, "loss": 2.2508, "step": 5117 }, { "epoch": 0.68, "grad_norm": 0.302734375, "learning_rate": 4.801040612966039e-05, "loss": 2.2658, "step": 5118 }, { "epoch": 0.68, "grad_norm": 0.296875, "learning_rate": 4.800957857074547e-05, "loss": 2.2474, "step": 5119 }, { "epoch": 0.68, "grad_norm": 0.283203125, "learning_rate": 4.8008750846892784e-05, "loss": 2.2573, "step": 5120 }, { "epoch": 0.68, "grad_norm": 0.294921875, "learning_rate": 4.800792295810825e-05, "loss": 2.2475, "step": 5121 }, { "epoch": 0.68, "grad_norm": 0.291015625, "learning_rate": 4.800709490439782e-05, "loss": 2.2772, "step": 5122 }, { "epoch": 0.68, "grad_norm": 0.287109375, "learning_rate": 4.800626668576741e-05, "loss": 2.2592, "step": 5123 }, { "epoch": 0.68, "grad_norm": 0.275390625, "learning_rate": 4.800543830222297e-05, "loss": 2.2581, "step": 5124 }, { "epoch": 0.68, "grad_norm": 0.287109375, "learning_rate": 4.8004609753770434e-05, "loss": 2.2752, "step": 5125 }, { "epoch": 0.68, "grad_norm": 0.287109375, "learning_rate": 4.800378104041574e-05, "loss": 2.2494, "step": 5126 }, { "epoch": 0.68, "grad_norm": 0.291015625, "learning_rate": 4.8002952162164834e-05, "loss": 2.2676, "step": 5127 }, { "epoch": 0.68, "grad_norm": 0.294921875, "learning_rate": 4.800212311902364e-05, "loss": 2.2723, "step": 5128 }, { "epoch": 0.68, "grad_norm": 0.30078125, "learning_rate": 4.8001293910998136e-05, "loss": 2.2538, "step": 5129 }, { "epoch": 0.68, "grad_norm": 0.306640625, "learning_rate": 4.800046453809423e-05, "loss": 2.2369, "step": 5130 }, { "epoch": 0.68, "grad_norm": 0.3046875, "learning_rate": 4.799963500031789e-05, "loss": 2.2345, "step": 5131 }, { "epoch": 0.68, "grad_norm": 0.3046875, "learning_rate": 4.7998805297675044e-05, "loss": 2.2526, "step": 5132 }, { "epoch": 0.68, "grad_norm": 0.291015625, "learning_rate": 4.7997975430171654e-05, "loss": 2.2459, "step": 5133 }, { "epoch": 0.68, "grad_norm": 0.294921875, "learning_rate": 4.7997145397813663e-05, "loss": 2.2665, "step": 5134 }, { "epoch": 0.68, "grad_norm": 0.3046875, "learning_rate": 4.799631520060702e-05, "loss": 2.2979, "step": 5135 }, { "epoch": 0.69, "grad_norm": 0.291015625, "learning_rate": 4.799548483855768e-05, "loss": 2.2554, "step": 5136 }, { "epoch": 0.69, "grad_norm": 0.296875, "learning_rate": 4.799465431167159e-05, "loss": 2.2861, "step": 5137 }, { "epoch": 0.69, "grad_norm": 0.29296875, "learning_rate": 4.7993823619954714e-05, "loss": 2.2828, "step": 5138 }, { "epoch": 0.69, "grad_norm": 0.28125, "learning_rate": 4.799299276341299e-05, "loss": 2.2976, "step": 5139 }, { "epoch": 0.69, "grad_norm": 0.291015625, "learning_rate": 4.7992161742052386e-05, "loss": 2.2792, "step": 5140 }, { "epoch": 0.69, "grad_norm": 0.32421875, "learning_rate": 4.799133055587886e-05, "loss": 2.277, "step": 5141 }, { "epoch": 0.69, "grad_norm": 0.287109375, "learning_rate": 4.7990499204898366e-05, "loss": 2.2859, "step": 5142 }, { "epoch": 0.69, "grad_norm": 0.30078125, "learning_rate": 4.7989667689116856e-05, "loss": 2.2734, "step": 5143 }, { "epoch": 0.69, "grad_norm": 0.30078125, "learning_rate": 4.7988836008540297e-05, "loss": 2.2515, "step": 5144 }, { "epoch": 0.69, "grad_norm": 0.294921875, "learning_rate": 4.798800416317466e-05, "loss": 2.2371, "step": 5145 }, { "epoch": 0.69, "grad_norm": 0.279296875, "learning_rate": 4.79871721530259e-05, "loss": 2.2787, "step": 5146 }, { "epoch": 0.69, "grad_norm": 0.30078125, "learning_rate": 4.798633997809997e-05, "loss": 2.2694, "step": 5147 }, { "epoch": 0.69, "grad_norm": 0.298828125, "learning_rate": 4.7985507638402846e-05, "loss": 2.2919, "step": 5148 }, { "epoch": 0.69, "grad_norm": 0.28125, "learning_rate": 4.7984675133940507e-05, "loss": 2.2587, "step": 5149 }, { "epoch": 0.69, "grad_norm": 0.3125, "learning_rate": 4.7983842464718895e-05, "loss": 2.2683, "step": 5150 }, { "epoch": 0.69, "grad_norm": 0.30859375, "learning_rate": 4.7983009630743995e-05, "loss": 2.2519, "step": 5151 }, { "epoch": 0.69, "grad_norm": 0.302734375, "learning_rate": 4.798217663202177e-05, "loss": 2.2868, "step": 5152 }, { "epoch": 0.69, "grad_norm": 0.298828125, "learning_rate": 4.7981343468558194e-05, "loss": 2.2608, "step": 5153 }, { "epoch": 0.69, "grad_norm": 0.302734375, "learning_rate": 4.798051014035924e-05, "loss": 2.2645, "step": 5154 }, { "epoch": 0.69, "grad_norm": 0.29296875, "learning_rate": 4.7979676647430885e-05, "loss": 2.2699, "step": 5155 }, { "epoch": 0.69, "grad_norm": 0.291015625, "learning_rate": 4.797884298977909e-05, "loss": 2.247, "step": 5156 }, { "epoch": 0.69, "grad_norm": 0.283203125, "learning_rate": 4.797800916740985e-05, "loss": 2.2386, "step": 5157 }, { "epoch": 0.69, "grad_norm": 0.310546875, "learning_rate": 4.797717518032914e-05, "loss": 2.2898, "step": 5158 }, { "epoch": 0.69, "grad_norm": 0.287109375, "learning_rate": 4.797634102854291e-05, "loss": 2.2466, "step": 5159 }, { "epoch": 0.69, "grad_norm": 0.28515625, "learning_rate": 4.7975506712057175e-05, "loss": 2.2528, "step": 5160 }, { "epoch": 0.69, "grad_norm": 0.2890625, "learning_rate": 4.7974672230877895e-05, "loss": 2.2791, "step": 5161 }, { "epoch": 0.69, "grad_norm": 0.287109375, "learning_rate": 4.797383758501106e-05, "loss": 2.2734, "step": 5162 }, { "epoch": 0.69, "grad_norm": 0.298828125, "learning_rate": 4.797300277446265e-05, "loss": 2.2904, "step": 5163 }, { "epoch": 0.69, "grad_norm": 0.29296875, "learning_rate": 4.797216779923865e-05, "loss": 2.266, "step": 5164 }, { "epoch": 0.69, "grad_norm": 0.294921875, "learning_rate": 4.797133265934504e-05, "loss": 2.2738, "step": 5165 }, { "epoch": 0.69, "grad_norm": 0.3125, "learning_rate": 4.7970497354787816e-05, "loss": 2.2644, "step": 5166 }, { "epoch": 0.69, "grad_norm": 0.31640625, "learning_rate": 4.7969661885572956e-05, "loss": 2.2669, "step": 5167 }, { "epoch": 0.69, "grad_norm": 0.294921875, "learning_rate": 4.796882625170646e-05, "loss": 2.2528, "step": 5168 }, { "epoch": 0.69, "grad_norm": 0.28125, "learning_rate": 4.796799045319431e-05, "loss": 2.2824, "step": 5169 }, { "epoch": 0.69, "grad_norm": 0.28515625, "learning_rate": 4.79671544900425e-05, "loss": 2.2543, "step": 5170 }, { "epoch": 0.69, "grad_norm": 0.279296875, "learning_rate": 4.796631836225701e-05, "loss": 2.2617, "step": 5171 }, { "epoch": 0.69, "grad_norm": 0.302734375, "learning_rate": 4.796548206984386e-05, "loss": 2.2396, "step": 5172 }, { "epoch": 0.69, "grad_norm": 0.283203125, "learning_rate": 4.796464561280902e-05, "loss": 2.2797, "step": 5173 }, { "epoch": 0.69, "grad_norm": 0.291015625, "learning_rate": 4.79638089911585e-05, "loss": 2.2509, "step": 5174 }, { "epoch": 0.69, "grad_norm": 0.296875, "learning_rate": 4.7962972204898284e-05, "loss": 2.2532, "step": 5175 }, { "epoch": 0.69, "grad_norm": 0.296875, "learning_rate": 4.796213525403439e-05, "loss": 2.2699, "step": 5176 }, { "epoch": 0.69, "grad_norm": 0.2890625, "learning_rate": 4.79612981385728e-05, "loss": 2.2809, "step": 5177 }, { "epoch": 0.69, "grad_norm": 0.298828125, "learning_rate": 4.796046085851952e-05, "loss": 2.2784, "step": 5178 }, { "epoch": 0.69, "grad_norm": 0.294921875, "learning_rate": 4.795962341388056e-05, "loss": 2.276, "step": 5179 }, { "epoch": 0.69, "grad_norm": 0.294921875, "learning_rate": 4.795878580466191e-05, "loss": 2.2495, "step": 5180 }, { "epoch": 0.69, "grad_norm": 0.3125, "learning_rate": 4.795794803086958e-05, "loss": 2.2652, "step": 5181 }, { "epoch": 0.69, "grad_norm": 0.283203125, "learning_rate": 4.795711009250958e-05, "loss": 2.2423, "step": 5182 }, { "epoch": 0.69, "grad_norm": 0.3046875, "learning_rate": 4.79562719895879e-05, "loss": 2.218, "step": 5183 }, { "epoch": 0.69, "grad_norm": 0.30859375, "learning_rate": 4.795543372211057e-05, "loss": 2.2407, "step": 5184 }, { "epoch": 0.69, "grad_norm": 0.294921875, "learning_rate": 4.795459529008358e-05, "loss": 2.2896, "step": 5185 }, { "epoch": 0.69, "grad_norm": 0.298828125, "learning_rate": 4.7953756693512957e-05, "loss": 2.2648, "step": 5186 }, { "epoch": 0.69, "grad_norm": 0.29296875, "learning_rate": 4.79529179324047e-05, "loss": 2.2591, "step": 5187 }, { "epoch": 0.69, "grad_norm": 0.2890625, "learning_rate": 4.7952079006764826e-05, "loss": 2.2256, "step": 5188 }, { "epoch": 0.69, "grad_norm": 0.3203125, "learning_rate": 4.7951239916599346e-05, "loss": 2.279, "step": 5189 }, { "epoch": 0.69, "grad_norm": 0.28515625, "learning_rate": 4.795040066191428e-05, "loss": 2.2537, "step": 5190 }, { "epoch": 0.69, "grad_norm": 0.28125, "learning_rate": 4.794956124271564e-05, "loss": 2.2587, "step": 5191 }, { "epoch": 0.69, "grad_norm": 0.287109375, "learning_rate": 4.7948721659009436e-05, "loss": 2.2756, "step": 5192 }, { "epoch": 0.69, "grad_norm": 0.28515625, "learning_rate": 4.79478819108017e-05, "loss": 2.2639, "step": 5193 }, { "epoch": 0.69, "grad_norm": 0.30078125, "learning_rate": 4.794704199809844e-05, "loss": 2.2701, "step": 5194 }, { "epoch": 0.69, "grad_norm": 0.3046875, "learning_rate": 4.794620192090569e-05, "loss": 2.2697, "step": 5195 }, { "epoch": 0.69, "grad_norm": 0.28515625, "learning_rate": 4.794536167922946e-05, "loss": 2.2837, "step": 5196 }, { "epoch": 0.69, "grad_norm": 0.296875, "learning_rate": 4.794452127307578e-05, "loss": 2.2689, "step": 5197 }, { "epoch": 0.69, "grad_norm": 0.302734375, "learning_rate": 4.794368070245067e-05, "loss": 2.2674, "step": 5198 }, { "epoch": 0.69, "grad_norm": 0.2890625, "learning_rate": 4.794283996736015e-05, "loss": 2.2319, "step": 5199 }, { "epoch": 0.69, "grad_norm": 0.30078125, "learning_rate": 4.794199906781025e-05, "loss": 2.2559, "step": 5200 }, { "epoch": 0.69, "grad_norm": 0.306640625, "learning_rate": 4.794115800380702e-05, "loss": 2.2771, "step": 5201 }, { "epoch": 0.69, "grad_norm": 0.279296875, "learning_rate": 4.794031677535645e-05, "loss": 2.2688, "step": 5202 }, { "epoch": 0.69, "grad_norm": 0.294921875, "learning_rate": 4.7939475382464604e-05, "loss": 2.2722, "step": 5203 }, { "epoch": 0.69, "grad_norm": 0.298828125, "learning_rate": 4.793863382513749e-05, "loss": 2.2636, "step": 5204 }, { "epoch": 0.69, "grad_norm": 0.28515625, "learning_rate": 4.793779210338115e-05, "loss": 2.2772, "step": 5205 }, { "epoch": 0.69, "grad_norm": 0.318359375, "learning_rate": 4.7936950217201625e-05, "loss": 2.2751, "step": 5206 }, { "epoch": 0.69, "grad_norm": 0.302734375, "learning_rate": 4.7936108166604935e-05, "loss": 2.2763, "step": 5207 }, { "epoch": 0.69, "grad_norm": 0.298828125, "learning_rate": 4.793526595159713e-05, "loss": 2.2669, "step": 5208 }, { "epoch": 0.69, "grad_norm": 0.29296875, "learning_rate": 4.7934423572184236e-05, "loss": 2.2891, "step": 5209 }, { "epoch": 0.69, "grad_norm": 0.3046875, "learning_rate": 4.7933581028372296e-05, "loss": 2.2572, "step": 5210 }, { "epoch": 0.7, "grad_norm": 0.296875, "learning_rate": 4.793273832016735e-05, "loss": 2.2536, "step": 5211 }, { "epoch": 0.7, "grad_norm": 0.310546875, "learning_rate": 4.7931895447575435e-05, "loss": 2.253, "step": 5212 }, { "epoch": 0.7, "grad_norm": 0.294921875, "learning_rate": 4.79310524106026e-05, "loss": 2.255, "step": 5213 }, { "epoch": 0.7, "grad_norm": 0.28515625, "learning_rate": 4.793020920925488e-05, "loss": 2.2374, "step": 5214 }, { "epoch": 0.7, "grad_norm": 0.28125, "learning_rate": 4.792936584353832e-05, "loss": 2.2446, "step": 5215 }, { "epoch": 0.7, "grad_norm": 0.30078125, "learning_rate": 4.7928522313458976e-05, "loss": 2.2858, "step": 5216 }, { "epoch": 0.7, "grad_norm": 0.283203125, "learning_rate": 4.7927678619022884e-05, "loss": 2.2506, "step": 5217 }, { "epoch": 0.7, "grad_norm": 0.30078125, "learning_rate": 4.7926834760236095e-05, "loss": 2.2839, "step": 5218 }, { "epoch": 0.7, "grad_norm": 0.29296875, "learning_rate": 4.792599073710467e-05, "loss": 2.2609, "step": 5219 }, { "epoch": 0.7, "grad_norm": 0.3125, "learning_rate": 4.792514654963462e-05, "loss": 2.2833, "step": 5220 }, { "epoch": 0.7, "grad_norm": 0.298828125, "learning_rate": 4.792430219783205e-05, "loss": 2.2899, "step": 5221 }, { "epoch": 0.7, "grad_norm": 0.287109375, "learning_rate": 4.792345768170298e-05, "loss": 2.2676, "step": 5222 }, { "epoch": 0.7, "grad_norm": 0.3046875, "learning_rate": 4.7922613001253456e-05, "loss": 2.2602, "step": 5223 }, { "epoch": 0.7, "grad_norm": 0.298828125, "learning_rate": 4.792176815648955e-05, "loss": 2.2762, "step": 5224 }, { "epoch": 0.7, "grad_norm": 0.28515625, "learning_rate": 4.792092314741733e-05, "loss": 2.2504, "step": 5225 }, { "epoch": 0.7, "grad_norm": 0.28515625, "learning_rate": 4.7920077974042826e-05, "loss": 2.2838, "step": 5226 }, { "epoch": 0.7, "grad_norm": 0.30078125, "learning_rate": 4.7919232636372114e-05, "loss": 2.2629, "step": 5227 }, { "epoch": 0.7, "grad_norm": 0.28515625, "learning_rate": 4.7918387134411236e-05, "loss": 2.2644, "step": 5228 }, { "epoch": 0.7, "grad_norm": 0.29296875, "learning_rate": 4.791754146816627e-05, "loss": 2.2679, "step": 5229 }, { "epoch": 0.7, "grad_norm": 0.30078125, "learning_rate": 4.791669563764328e-05, "loss": 2.2572, "step": 5230 }, { "epoch": 0.7, "grad_norm": 0.28515625, "learning_rate": 4.791584964284832e-05, "loss": 2.2629, "step": 5231 }, { "epoch": 0.7, "grad_norm": 0.29296875, "learning_rate": 4.791500348378745e-05, "loss": 2.269, "step": 5232 }, { "epoch": 0.7, "grad_norm": 0.291015625, "learning_rate": 4.7914157160466745e-05, "loss": 2.272, "step": 5233 }, { "epoch": 0.7, "grad_norm": 0.2890625, "learning_rate": 4.7913310672892265e-05, "loss": 2.2865, "step": 5234 }, { "epoch": 0.7, "grad_norm": 0.291015625, "learning_rate": 4.791246402107008e-05, "loss": 2.3123, "step": 5235 }, { "epoch": 0.7, "grad_norm": 0.302734375, "learning_rate": 4.7911617205006256e-05, "loss": 2.2463, "step": 5236 }, { "epoch": 0.7, "grad_norm": 0.30859375, "learning_rate": 4.791077022470688e-05, "loss": 2.2545, "step": 5237 }, { "epoch": 0.7, "grad_norm": 0.296875, "learning_rate": 4.7909923080178e-05, "loss": 2.2337, "step": 5238 }, { "epoch": 0.7, "grad_norm": 0.294921875, "learning_rate": 4.79090757714257e-05, "loss": 2.2609, "step": 5239 }, { "epoch": 0.7, "grad_norm": 0.296875, "learning_rate": 4.790822829845605e-05, "loss": 2.2599, "step": 5240 }, { "epoch": 0.7, "grad_norm": 0.302734375, "learning_rate": 4.790738066127513e-05, "loss": 2.2672, "step": 5241 }, { "epoch": 0.7, "grad_norm": 0.28125, "learning_rate": 4.790653285988902e-05, "loss": 2.2719, "step": 5242 }, { "epoch": 0.7, "grad_norm": 0.2890625, "learning_rate": 4.790568489430378e-05, "loss": 2.2823, "step": 5243 }, { "epoch": 0.7, "grad_norm": 0.28515625, "learning_rate": 4.79048367645255e-05, "loss": 2.2839, "step": 5244 }, { "epoch": 0.7, "grad_norm": 0.296875, "learning_rate": 4.7903988470560265e-05, "loss": 2.2465, "step": 5245 }, { "epoch": 0.7, "grad_norm": 0.283203125, "learning_rate": 4.7903140012414144e-05, "loss": 2.2509, "step": 5246 }, { "epoch": 0.7, "grad_norm": 0.287109375, "learning_rate": 4.7902291390093224e-05, "loss": 2.25, "step": 5247 }, { "epoch": 0.7, "grad_norm": 0.30859375, "learning_rate": 4.7901442603603596e-05, "loss": 2.2676, "step": 5248 }, { "epoch": 0.7, "grad_norm": 0.294921875, "learning_rate": 4.7900593652951334e-05, "loss": 2.2568, "step": 5249 }, { "epoch": 0.7, "grad_norm": 0.29296875, "learning_rate": 4.789974453814252e-05, "loss": 2.2313, "step": 5250 }, { "epoch": 0.7, "grad_norm": 0.28515625, "learning_rate": 4.789889525918325e-05, "loss": 2.2696, "step": 5251 }, { "epoch": 0.7, "grad_norm": 0.3046875, "learning_rate": 4.789804581607961e-05, "loss": 2.2432, "step": 5252 }, { "epoch": 0.7, "grad_norm": 0.287109375, "learning_rate": 4.789719620883768e-05, "loss": 2.2706, "step": 5253 }, { "epoch": 0.7, "grad_norm": 0.294921875, "learning_rate": 4.7896346437463566e-05, "loss": 2.2605, "step": 5254 }, { "epoch": 0.7, "grad_norm": 0.296875, "learning_rate": 4.789549650196335e-05, "loss": 2.2525, "step": 5255 }, { "epoch": 0.7, "grad_norm": 0.2890625, "learning_rate": 4.789464640234312e-05, "loss": 2.266, "step": 5256 }, { "epoch": 0.7, "grad_norm": 0.29296875, "learning_rate": 4.789379613860898e-05, "loss": 2.2661, "step": 5257 }, { "epoch": 0.7, "grad_norm": 0.2890625, "learning_rate": 4.789294571076702e-05, "loss": 2.2707, "step": 5258 }, { "epoch": 0.7, "grad_norm": 0.267578125, "learning_rate": 4.7892095118823335e-05, "loss": 2.2909, "step": 5259 }, { "epoch": 0.7, "grad_norm": 0.30859375, "learning_rate": 4.789124436278403e-05, "loss": 2.2676, "step": 5260 }, { "epoch": 0.7, "grad_norm": 0.30859375, "learning_rate": 4.789039344265519e-05, "loss": 2.2824, "step": 5261 }, { "epoch": 0.7, "grad_norm": 0.28125, "learning_rate": 4.788954235844292e-05, "loss": 2.2731, "step": 5262 }, { "epoch": 0.7, "grad_norm": 0.279296875, "learning_rate": 4.7888691110153324e-05, "loss": 2.2481, "step": 5263 }, { "epoch": 0.7, "grad_norm": 0.2890625, "learning_rate": 4.7887839697792504e-05, "loss": 2.2815, "step": 5264 }, { "epoch": 0.7, "grad_norm": 0.298828125, "learning_rate": 4.788698812136656e-05, "loss": 2.2372, "step": 5265 }, { "epoch": 0.7, "grad_norm": 0.306640625, "learning_rate": 4.78861363808816e-05, "loss": 2.2514, "step": 5266 }, { "epoch": 0.7, "grad_norm": 0.27734375, "learning_rate": 4.7885284476343726e-05, "loss": 2.2416, "step": 5267 }, { "epoch": 0.7, "grad_norm": 0.298828125, "learning_rate": 4.7884432407759045e-05, "loss": 2.2844, "step": 5268 }, { "epoch": 0.7, "grad_norm": 0.28515625, "learning_rate": 4.7883580175133666e-05, "loss": 2.2815, "step": 5269 }, { "epoch": 0.7, "grad_norm": 0.26953125, "learning_rate": 4.7882727778473696e-05, "loss": 2.2719, "step": 5270 }, { "epoch": 0.7, "grad_norm": 0.29296875, "learning_rate": 4.788187521778525e-05, "loss": 2.2761, "step": 5271 }, { "epoch": 0.7, "grad_norm": 0.302734375, "learning_rate": 4.788102249307443e-05, "loss": 2.2388, "step": 5272 }, { "epoch": 0.7, "grad_norm": 0.30078125, "learning_rate": 4.788016960434736e-05, "loss": 2.2758, "step": 5273 }, { "epoch": 0.7, "grad_norm": 0.279296875, "learning_rate": 4.787931655161014e-05, "loss": 2.2605, "step": 5274 }, { "epoch": 0.7, "grad_norm": 0.310546875, "learning_rate": 4.7878463334868903e-05, "loss": 2.2784, "step": 5275 }, { "epoch": 0.7, "grad_norm": 0.27734375, "learning_rate": 4.7877609954129755e-05, "loss": 2.2675, "step": 5276 }, { "epoch": 0.7, "grad_norm": 0.291015625, "learning_rate": 4.78767564093988e-05, "loss": 2.2927, "step": 5277 }, { "epoch": 0.7, "grad_norm": 0.279296875, "learning_rate": 4.787590270068219e-05, "loss": 2.2623, "step": 5278 }, { "epoch": 0.7, "grad_norm": 0.28125, "learning_rate": 4.7875048827986005e-05, "loss": 2.2615, "step": 5279 }, { "epoch": 0.7, "grad_norm": 0.275390625, "learning_rate": 4.7874194791316394e-05, "loss": 2.2546, "step": 5280 }, { "epoch": 0.7, "grad_norm": 0.28125, "learning_rate": 4.787334059067947e-05, "loss": 2.2654, "step": 5281 }, { "epoch": 0.7, "grad_norm": 0.294921875, "learning_rate": 4.787248622608136e-05, "loss": 2.2797, "step": 5282 }, { "epoch": 0.7, "grad_norm": 0.28515625, "learning_rate": 4.787163169752818e-05, "loss": 2.2962, "step": 5283 }, { "epoch": 0.7, "grad_norm": 0.28125, "learning_rate": 4.7870777005026056e-05, "loss": 2.2886, "step": 5284 }, { "epoch": 0.7, "grad_norm": 0.3125, "learning_rate": 4.7869922148581126e-05, "loss": 2.2511, "step": 5285 }, { "epoch": 0.71, "grad_norm": 0.302734375, "learning_rate": 4.786906712819951e-05, "loss": 2.2632, "step": 5286 }, { "epoch": 0.71, "grad_norm": 0.298828125, "learning_rate": 4.786821194388733e-05, "loss": 2.2774, "step": 5287 }, { "epoch": 0.71, "grad_norm": 0.28515625, "learning_rate": 4.786735659565073e-05, "loss": 2.2859, "step": 5288 }, { "epoch": 0.71, "grad_norm": 0.30859375, "learning_rate": 4.786650108349583e-05, "loss": 2.2543, "step": 5289 }, { "epoch": 0.71, "grad_norm": 0.294921875, "learning_rate": 4.786564540742877e-05, "loss": 2.3164, "step": 5290 }, { "epoch": 0.71, "grad_norm": 0.27734375, "learning_rate": 4.786478956745568e-05, "loss": 2.239, "step": 5291 }, { "epoch": 0.71, "grad_norm": 0.2890625, "learning_rate": 4.78639335635827e-05, "loss": 2.2656, "step": 5292 }, { "epoch": 0.71, "grad_norm": 0.287109375, "learning_rate": 4.786307739581595e-05, "loss": 2.3, "step": 5293 }, { "epoch": 0.71, "grad_norm": 0.30078125, "learning_rate": 4.786222106416159e-05, "loss": 2.2562, "step": 5294 }, { "epoch": 0.71, "grad_norm": 0.29296875, "learning_rate": 4.7861364568625744e-05, "loss": 2.2699, "step": 5295 }, { "epoch": 0.71, "grad_norm": 0.28125, "learning_rate": 4.786050790921455e-05, "loss": 2.2734, "step": 5296 }, { "epoch": 0.71, "grad_norm": 0.298828125, "learning_rate": 4.785965108593417e-05, "loss": 2.2441, "step": 5297 }, { "epoch": 0.71, "grad_norm": 0.283203125, "learning_rate": 4.785879409879071e-05, "loss": 2.2687, "step": 5298 }, { "epoch": 0.71, "grad_norm": 0.28125, "learning_rate": 4.7857936947790335e-05, "loss": 2.2468, "step": 5299 }, { "epoch": 0.71, "grad_norm": 0.279296875, "learning_rate": 4.785707963293919e-05, "loss": 2.2581, "step": 5300 }, { "epoch": 0.71, "grad_norm": 0.28515625, "learning_rate": 4.7856222154243424e-05, "loss": 2.2571, "step": 5301 }, { "epoch": 0.71, "grad_norm": 0.3046875, "learning_rate": 4.7855364511709176e-05, "loss": 2.2626, "step": 5302 }, { "epoch": 0.71, "grad_norm": 0.302734375, "learning_rate": 4.785450670534259e-05, "loss": 2.2936, "step": 5303 }, { "epoch": 0.71, "grad_norm": 0.291015625, "learning_rate": 4.785364873514982e-05, "loss": 2.248, "step": 5304 }, { "epoch": 0.71, "grad_norm": 0.294921875, "learning_rate": 4.7852790601137016e-05, "loss": 2.2688, "step": 5305 }, { "epoch": 0.71, "grad_norm": 0.30859375, "learning_rate": 4.785193230331033e-05, "loss": 2.2767, "step": 5306 }, { "epoch": 0.71, "grad_norm": 0.287109375, "learning_rate": 4.785107384167591e-05, "loss": 2.2454, "step": 5307 }, { "epoch": 0.71, "grad_norm": 0.291015625, "learning_rate": 4.7850215216239914e-05, "loss": 2.2712, "step": 5308 }, { "epoch": 0.71, "grad_norm": 0.294921875, "learning_rate": 4.78493564270085e-05, "loss": 2.2652, "step": 5309 }, { "epoch": 0.71, "grad_norm": 0.287109375, "learning_rate": 4.784849747398782e-05, "loss": 2.2394, "step": 5310 }, { "epoch": 0.71, "grad_norm": 0.283203125, "learning_rate": 4.7847638357184026e-05, "loss": 2.2691, "step": 5311 }, { "epoch": 0.71, "grad_norm": 0.30859375, "learning_rate": 4.784677907660329e-05, "loss": 2.2477, "step": 5312 }, { "epoch": 0.71, "grad_norm": 0.296875, "learning_rate": 4.784591963225176e-05, "loss": 2.2364, "step": 5313 }, { "epoch": 0.71, "grad_norm": 0.29296875, "learning_rate": 4.784506002413559e-05, "loss": 2.2261, "step": 5314 }, { "epoch": 0.71, "grad_norm": 0.310546875, "learning_rate": 4.784420025226096e-05, "loss": 2.2754, "step": 5315 }, { "epoch": 0.71, "grad_norm": 0.294921875, "learning_rate": 4.784334031663402e-05, "loss": 2.2917, "step": 5316 }, { "epoch": 0.71, "grad_norm": 0.28125, "learning_rate": 4.784248021726094e-05, "loss": 2.2728, "step": 5317 }, { "epoch": 0.71, "grad_norm": 0.296875, "learning_rate": 4.7841619954147885e-05, "loss": 2.2312, "step": 5318 }, { "epoch": 0.71, "grad_norm": 0.291015625, "learning_rate": 4.784075952730102e-05, "loss": 2.2566, "step": 5319 }, { "epoch": 0.71, "grad_norm": 0.30078125, "learning_rate": 4.783989893672651e-05, "loss": 2.2674, "step": 5320 }, { "epoch": 0.71, "grad_norm": 0.310546875, "learning_rate": 4.783903818243054e-05, "loss": 2.267, "step": 5321 }, { "epoch": 0.71, "grad_norm": 0.296875, "learning_rate": 4.783817726441926e-05, "loss": 2.2629, "step": 5322 }, { "epoch": 0.71, "grad_norm": 0.296875, "learning_rate": 4.783731618269884e-05, "loss": 2.2149, "step": 5323 }, { "epoch": 0.71, "grad_norm": 0.28515625, "learning_rate": 4.7836454937275474e-05, "loss": 2.2822, "step": 5324 }, { "epoch": 0.71, "grad_norm": 0.33203125, "learning_rate": 4.783559352815532e-05, "loss": 2.2923, "step": 5325 }, { "epoch": 0.71, "grad_norm": 0.2890625, "learning_rate": 4.783473195534455e-05, "loss": 2.277, "step": 5326 }, { "epoch": 0.71, "grad_norm": 0.296875, "learning_rate": 4.7833870218849354e-05, "loss": 2.2682, "step": 5327 }, { "epoch": 0.71, "grad_norm": 0.3046875, "learning_rate": 4.783300831867589e-05, "loss": 2.2958, "step": 5328 }, { "epoch": 0.71, "grad_norm": 0.298828125, "learning_rate": 4.783214625483035e-05, "loss": 2.2373, "step": 5329 }, { "epoch": 0.71, "grad_norm": 0.283203125, "learning_rate": 4.783128402731891e-05, "loss": 2.2864, "step": 5330 }, { "epoch": 0.71, "grad_norm": 0.310546875, "learning_rate": 4.783042163614776e-05, "loss": 2.2663, "step": 5331 }, { "epoch": 0.71, "grad_norm": 0.30859375, "learning_rate": 4.782955908132306e-05, "loss": 2.2699, "step": 5332 }, { "epoch": 0.71, "grad_norm": 0.287109375, "learning_rate": 4.782869636285101e-05, "loss": 2.2968, "step": 5333 }, { "epoch": 0.71, "grad_norm": 0.294921875, "learning_rate": 4.782783348073779e-05, "loss": 2.2758, "step": 5334 }, { "epoch": 0.71, "grad_norm": 0.2890625, "learning_rate": 4.782697043498959e-05, "loss": 2.2676, "step": 5335 }, { "epoch": 0.71, "grad_norm": 0.2890625, "learning_rate": 4.7826107225612584e-05, "loss": 2.2557, "step": 5336 }, { "epoch": 0.71, "grad_norm": 0.28515625, "learning_rate": 4.782524385261297e-05, "loss": 2.2582, "step": 5337 }, { "epoch": 0.71, "grad_norm": 0.2890625, "learning_rate": 4.7824380315996945e-05, "loss": 2.2428, "step": 5338 }, { "epoch": 0.71, "grad_norm": 0.310546875, "learning_rate": 4.782351661577067e-05, "loss": 2.2633, "step": 5339 }, { "epoch": 0.71, "grad_norm": 0.287109375, "learning_rate": 4.782265275194037e-05, "loss": 2.2647, "step": 5340 }, { "epoch": 0.71, "grad_norm": 0.30078125, "learning_rate": 4.7821788724512204e-05, "loss": 2.2397, "step": 5341 }, { "epoch": 0.71, "grad_norm": 0.298828125, "learning_rate": 4.78209245334924e-05, "loss": 2.2431, "step": 5342 }, { "epoch": 0.71, "grad_norm": 0.30078125, "learning_rate": 4.782006017888713e-05, "loss": 2.2524, "step": 5343 }, { "epoch": 0.71, "grad_norm": 0.296875, "learning_rate": 4.7819195660702596e-05, "loss": 2.2663, "step": 5344 }, { "epoch": 0.71, "grad_norm": 0.283203125, "learning_rate": 4.781833097894499e-05, "loss": 2.2873, "step": 5345 }, { "epoch": 0.71, "grad_norm": 0.3046875, "learning_rate": 4.7817466133620526e-05, "loss": 2.2901, "step": 5346 }, { "epoch": 0.71, "grad_norm": 0.296875, "learning_rate": 4.781660112473538e-05, "loss": 2.2834, "step": 5347 }, { "epoch": 0.71, "grad_norm": 0.29296875, "learning_rate": 4.7815735952295784e-05, "loss": 2.2395, "step": 5348 }, { "epoch": 0.71, "grad_norm": 0.294921875, "learning_rate": 4.781487061630791e-05, "loss": 2.253, "step": 5349 }, { "epoch": 0.71, "grad_norm": 0.28515625, "learning_rate": 4.781400511677796e-05, "loss": 2.2726, "step": 5350 }, { "epoch": 0.71, "grad_norm": 0.291015625, "learning_rate": 4.7813139453712166e-05, "loss": 2.3076, "step": 5351 }, { "epoch": 0.71, "grad_norm": 0.2890625, "learning_rate": 4.781227362711671e-05, "loss": 2.2865, "step": 5352 }, { "epoch": 0.71, "grad_norm": 0.28515625, "learning_rate": 4.7811407636997806e-05, "loss": 2.28, "step": 5353 }, { "epoch": 0.71, "grad_norm": 0.28125, "learning_rate": 4.781054148336167e-05, "loss": 2.2689, "step": 5354 }, { "epoch": 0.71, "grad_norm": 0.296875, "learning_rate": 4.780967516621449e-05, "loss": 2.2525, "step": 5355 }, { "epoch": 0.71, "grad_norm": 0.279296875, "learning_rate": 4.78088086855625e-05, "loss": 2.2451, "step": 5356 }, { "epoch": 0.71, "grad_norm": 0.28515625, "learning_rate": 4.780794204141188e-05, "loss": 2.2488, "step": 5357 }, { "epoch": 0.71, "grad_norm": 0.294921875, "learning_rate": 4.7807075233768886e-05, "loss": 2.2586, "step": 5358 }, { "epoch": 0.71, "grad_norm": 0.2890625, "learning_rate": 4.780620826263969e-05, "loss": 2.2813, "step": 5359 }, { "epoch": 0.71, "grad_norm": 0.28515625, "learning_rate": 4.780534112803052e-05, "loss": 2.2724, "step": 5360 }, { "epoch": 0.72, "grad_norm": 0.298828125, "learning_rate": 4.780447382994761e-05, "loss": 2.2472, "step": 5361 }, { "epoch": 0.72, "grad_norm": 0.2890625, "learning_rate": 4.780360636839716e-05, "loss": 2.288, "step": 5362 }, { "epoch": 0.72, "grad_norm": 0.3125, "learning_rate": 4.780273874338538e-05, "loss": 2.285, "step": 5363 }, { "epoch": 0.72, "grad_norm": 0.283203125, "learning_rate": 4.78018709549185e-05, "loss": 2.269, "step": 5364 }, { "epoch": 0.72, "grad_norm": 0.28515625, "learning_rate": 4.780100300300275e-05, "loss": 2.2754, "step": 5365 }, { "epoch": 0.72, "grad_norm": 0.287109375, "learning_rate": 4.7800134887644345e-05, "loss": 2.2564, "step": 5366 }, { "epoch": 0.72, "grad_norm": 0.29296875, "learning_rate": 4.7799266608849493e-05, "loss": 2.2615, "step": 5367 }, { "epoch": 0.72, "grad_norm": 0.294921875, "learning_rate": 4.779839816662444e-05, "loss": 2.2764, "step": 5368 }, { "epoch": 0.72, "grad_norm": 0.28125, "learning_rate": 4.7797529560975396e-05, "loss": 2.2555, "step": 5369 }, { "epoch": 0.72, "grad_norm": 0.310546875, "learning_rate": 4.7796660791908585e-05, "loss": 2.2448, "step": 5370 }, { "epoch": 0.72, "grad_norm": 0.294921875, "learning_rate": 4.779579185943025e-05, "loss": 2.299, "step": 5371 }, { "epoch": 0.72, "grad_norm": 0.2890625, "learning_rate": 4.7794922763546614e-05, "loss": 2.3053, "step": 5372 }, { "epoch": 0.72, "grad_norm": 0.294921875, "learning_rate": 4.77940535042639e-05, "loss": 2.2625, "step": 5373 }, { "epoch": 0.72, "grad_norm": 0.31640625, "learning_rate": 4.7793184081588346e-05, "loss": 2.2438, "step": 5374 }, { "epoch": 0.72, "grad_norm": 0.28515625, "learning_rate": 4.779231449552618e-05, "loss": 2.287, "step": 5375 }, { "epoch": 0.72, "grad_norm": 0.296875, "learning_rate": 4.779144474608364e-05, "loss": 2.2889, "step": 5376 }, { "epoch": 0.72, "grad_norm": 0.306640625, "learning_rate": 4.779057483326695e-05, "loss": 2.2499, "step": 5377 }, { "epoch": 0.72, "grad_norm": 0.27734375, "learning_rate": 4.778970475708236e-05, "loss": 2.2803, "step": 5378 }, { "epoch": 0.72, "grad_norm": 0.287109375, "learning_rate": 4.7788834517536106e-05, "loss": 2.2894, "step": 5379 }, { "epoch": 0.72, "grad_norm": 0.283203125, "learning_rate": 4.7787964114634416e-05, "loss": 2.2363, "step": 5380 }, { "epoch": 0.72, "grad_norm": 0.291015625, "learning_rate": 4.778709354838353e-05, "loss": 2.2846, "step": 5381 }, { "epoch": 0.72, "grad_norm": 0.294921875, "learning_rate": 4.77862228187897e-05, "loss": 2.2809, "step": 5382 }, { "epoch": 0.72, "grad_norm": 0.2890625, "learning_rate": 4.7785351925859145e-05, "loss": 2.2871, "step": 5383 }, { "epoch": 0.72, "grad_norm": 0.3046875, "learning_rate": 4.778448086959814e-05, "loss": 2.2559, "step": 5384 }, { "epoch": 0.72, "grad_norm": 0.294921875, "learning_rate": 4.778360965001289e-05, "loss": 2.2774, "step": 5385 }, { "epoch": 0.72, "grad_norm": 0.298828125, "learning_rate": 4.7782738267109675e-05, "loss": 2.2643, "step": 5386 }, { "epoch": 0.72, "grad_norm": 0.28125, "learning_rate": 4.778186672089473e-05, "loss": 2.2649, "step": 5387 }, { "epoch": 0.72, "grad_norm": 0.287109375, "learning_rate": 4.77809950113743e-05, "loss": 2.2556, "step": 5388 }, { "epoch": 0.72, "grad_norm": 0.2890625, "learning_rate": 4.7780123138554624e-05, "loss": 2.2597, "step": 5389 }, { "epoch": 0.72, "grad_norm": 0.298828125, "learning_rate": 4.777925110244197e-05, "loss": 2.2515, "step": 5390 }, { "epoch": 0.72, "grad_norm": 0.287109375, "learning_rate": 4.777837890304258e-05, "loss": 2.2703, "step": 5391 }, { "epoch": 0.72, "grad_norm": 0.291015625, "learning_rate": 4.77775065403627e-05, "loss": 2.2784, "step": 5392 }, { "epoch": 0.72, "grad_norm": 0.30859375, "learning_rate": 4.777663401440859e-05, "loss": 2.2469, "step": 5393 }, { "epoch": 0.72, "grad_norm": 0.3125, "learning_rate": 4.777576132518651e-05, "loss": 2.3032, "step": 5394 }, { "epoch": 0.72, "grad_norm": 0.28515625, "learning_rate": 4.777488847270271e-05, "loss": 2.2785, "step": 5395 }, { "epoch": 0.72, "grad_norm": 0.291015625, "learning_rate": 4.777401545696345e-05, "loss": 2.2544, "step": 5396 }, { "epoch": 0.72, "grad_norm": 0.296875, "learning_rate": 4.777314227797498e-05, "loss": 2.249, "step": 5397 }, { "epoch": 0.72, "grad_norm": 0.291015625, "learning_rate": 4.7772268935743564e-05, "loss": 2.2905, "step": 5398 }, { "epoch": 0.72, "grad_norm": 0.296875, "learning_rate": 4.777139543027546e-05, "loss": 2.2605, "step": 5399 }, { "epoch": 0.72, "grad_norm": 0.298828125, "learning_rate": 4.7770521761576935e-05, "loss": 2.249, "step": 5400 }, { "epoch": 0.72, "grad_norm": 0.2890625, "learning_rate": 4.776964792965425e-05, "loss": 2.2409, "step": 5401 }, { "epoch": 0.72, "grad_norm": 0.30078125, "learning_rate": 4.776877393451366e-05, "loss": 2.2462, "step": 5402 }, { "epoch": 0.72, "grad_norm": 0.30859375, "learning_rate": 4.776789977616144e-05, "loss": 2.2391, "step": 5403 }, { "epoch": 0.72, "grad_norm": 0.296875, "learning_rate": 4.776702545460386e-05, "loss": 2.233, "step": 5404 }, { "epoch": 0.72, "grad_norm": 0.296875, "learning_rate": 4.7766150969847175e-05, "loss": 2.2721, "step": 5405 }, { "epoch": 0.72, "grad_norm": 0.35546875, "learning_rate": 4.7765276321897656e-05, "loss": 2.2846, "step": 5406 }, { "epoch": 0.72, "grad_norm": 0.306640625, "learning_rate": 4.776440151076158e-05, "loss": 2.2707, "step": 5407 }, { "epoch": 0.72, "grad_norm": 0.294921875, "learning_rate": 4.776352653644521e-05, "loss": 2.2597, "step": 5408 }, { "epoch": 0.72, "grad_norm": 0.318359375, "learning_rate": 4.776265139895483e-05, "loss": 2.2613, "step": 5409 }, { "epoch": 0.72, "grad_norm": 0.291015625, "learning_rate": 4.77617760982967e-05, "loss": 2.301, "step": 5410 }, { "epoch": 0.72, "grad_norm": 0.302734375, "learning_rate": 4.7760900634477095e-05, "loss": 2.2308, "step": 5411 }, { "epoch": 0.72, "grad_norm": 0.296875, "learning_rate": 4.77600250075023e-05, "loss": 2.2665, "step": 5412 }, { "epoch": 0.72, "grad_norm": 0.32421875, "learning_rate": 4.775914921737859e-05, "loss": 2.2536, "step": 5413 }, { "epoch": 0.72, "grad_norm": 0.29296875, "learning_rate": 4.775827326411223e-05, "loss": 2.2658, "step": 5414 }, { "epoch": 0.72, "grad_norm": 0.296875, "learning_rate": 4.775739714770951e-05, "loss": 2.2758, "step": 5415 }, { "epoch": 0.72, "grad_norm": 0.275390625, "learning_rate": 4.775652086817671e-05, "loss": 2.2677, "step": 5416 }, { "epoch": 0.72, "grad_norm": 0.302734375, "learning_rate": 4.775564442552011e-05, "loss": 2.2723, "step": 5417 }, { "epoch": 0.72, "grad_norm": 0.294921875, "learning_rate": 4.775476781974599e-05, "loss": 2.2804, "step": 5418 }, { "epoch": 0.72, "grad_norm": 0.294921875, "learning_rate": 4.775389105086064e-05, "loss": 2.2465, "step": 5419 }, { "epoch": 0.72, "grad_norm": 0.279296875, "learning_rate": 4.775301411887034e-05, "loss": 2.2443, "step": 5420 }, { "epoch": 0.72, "grad_norm": 0.29296875, "learning_rate": 4.775213702378138e-05, "loss": 2.2425, "step": 5421 }, { "epoch": 0.72, "grad_norm": 0.30078125, "learning_rate": 4.7751259765600046e-05, "loss": 2.2684, "step": 5422 }, { "epoch": 0.72, "grad_norm": 0.30078125, "learning_rate": 4.775038234433261e-05, "loss": 2.2544, "step": 5423 }, { "epoch": 0.72, "grad_norm": 0.30078125, "learning_rate": 4.774950475998539e-05, "loss": 2.2747, "step": 5424 }, { "epoch": 0.72, "grad_norm": 0.29296875, "learning_rate": 4.774862701256466e-05, "loss": 2.235, "step": 5425 }, { "epoch": 0.72, "grad_norm": 0.291015625, "learning_rate": 4.774774910207671e-05, "loss": 2.251, "step": 5426 }, { "epoch": 0.72, "grad_norm": 0.310546875, "learning_rate": 4.774687102852784e-05, "loss": 2.3019, "step": 5427 }, { "epoch": 0.72, "grad_norm": 0.29296875, "learning_rate": 4.7745992791924345e-05, "loss": 2.2485, "step": 5428 }, { "epoch": 0.72, "grad_norm": 0.314453125, "learning_rate": 4.7745114392272514e-05, "loss": 2.2565, "step": 5429 }, { "epoch": 0.72, "grad_norm": 0.306640625, "learning_rate": 4.774423582957865e-05, "loss": 2.2525, "step": 5430 }, { "epoch": 0.72, "grad_norm": 0.296875, "learning_rate": 4.774335710384905e-05, "loss": 2.2721, "step": 5431 }, { "epoch": 0.72, "grad_norm": 0.28515625, "learning_rate": 4.774247821509e-05, "loss": 2.2562, "step": 5432 }, { "epoch": 0.72, "grad_norm": 0.314453125, "learning_rate": 4.7741599163307825e-05, "loss": 2.247, "step": 5433 }, { "epoch": 0.72, "grad_norm": 0.298828125, "learning_rate": 4.77407199485088e-05, "loss": 2.2638, "step": 5434 }, { "epoch": 0.72, "grad_norm": 0.302734375, "learning_rate": 4.7739840570699245e-05, "loss": 2.2761, "step": 5435 }, { "epoch": 0.73, "grad_norm": 0.29296875, "learning_rate": 4.7738961029885455e-05, "loss": 2.2721, "step": 5436 }, { "epoch": 0.73, "grad_norm": 0.294921875, "learning_rate": 4.773808132607374e-05, "loss": 2.2486, "step": 5437 }, { "epoch": 0.73, "grad_norm": 0.29296875, "learning_rate": 4.773720145927041e-05, "loss": 2.2393, "step": 5438 }, { "epoch": 0.73, "grad_norm": 0.302734375, "learning_rate": 4.773632142948176e-05, "loss": 2.3155, "step": 5439 }, { "epoch": 0.73, "grad_norm": 0.30078125, "learning_rate": 4.773544123671409e-05, "loss": 2.2329, "step": 5440 }, { "epoch": 0.73, "grad_norm": 0.30859375, "learning_rate": 4.7734560880973746e-05, "loss": 2.2342, "step": 5441 }, { "epoch": 0.73, "grad_norm": 0.2890625, "learning_rate": 4.7733680362267005e-05, "loss": 2.2586, "step": 5442 }, { "epoch": 0.73, "grad_norm": 0.30078125, "learning_rate": 4.773279968060019e-05, "loss": 2.2549, "step": 5443 }, { "epoch": 0.73, "grad_norm": 0.28515625, "learning_rate": 4.773191883597961e-05, "loss": 2.2917, "step": 5444 }, { "epoch": 0.73, "grad_norm": 0.283203125, "learning_rate": 4.773103782841159e-05, "loss": 2.2539, "step": 5445 }, { "epoch": 0.73, "grad_norm": 0.306640625, "learning_rate": 4.7730156657902435e-05, "loss": 2.2743, "step": 5446 }, { "epoch": 0.73, "grad_norm": 0.29296875, "learning_rate": 4.772927532445846e-05, "loss": 2.2976, "step": 5447 }, { "epoch": 0.73, "grad_norm": 0.30078125, "learning_rate": 4.7728393828085995e-05, "loss": 2.2215, "step": 5448 }, { "epoch": 0.73, "grad_norm": 0.30078125, "learning_rate": 4.7727512168791345e-05, "loss": 2.236, "step": 5449 }, { "epoch": 0.73, "grad_norm": 0.3046875, "learning_rate": 4.772663034658084e-05, "loss": 2.2855, "step": 5450 }, { "epoch": 0.73, "grad_norm": 0.287109375, "learning_rate": 4.772574836146079e-05, "loss": 2.2698, "step": 5451 }, { "epoch": 0.73, "grad_norm": 0.26953125, "learning_rate": 4.7724866213437525e-05, "loss": 2.2496, "step": 5452 }, { "epoch": 0.73, "grad_norm": 0.2890625, "learning_rate": 4.772398390251737e-05, "loss": 2.2681, "step": 5453 }, { "epoch": 0.73, "grad_norm": 0.3046875, "learning_rate": 4.772310142870665e-05, "loss": 2.2618, "step": 5454 }, { "epoch": 0.73, "grad_norm": 0.310546875, "learning_rate": 4.7722218792011684e-05, "loss": 2.2613, "step": 5455 }, { "epoch": 0.73, "grad_norm": 0.287109375, "learning_rate": 4.7721335992438806e-05, "loss": 2.2756, "step": 5456 }, { "epoch": 0.73, "grad_norm": 0.28515625, "learning_rate": 4.772045302999434e-05, "loss": 2.2219, "step": 5457 }, { "epoch": 0.73, "grad_norm": 0.30859375, "learning_rate": 4.771956990468462e-05, "loss": 2.2703, "step": 5458 }, { "epoch": 0.73, "grad_norm": 0.3125, "learning_rate": 4.771868661651596e-05, "loss": 2.2096, "step": 5459 }, { "epoch": 0.73, "grad_norm": 0.298828125, "learning_rate": 4.7717803165494714e-05, "loss": 2.2733, "step": 5460 }, { "epoch": 0.73, "grad_norm": 0.294921875, "learning_rate": 4.77169195516272e-05, "loss": 2.2643, "step": 5461 }, { "epoch": 0.73, "grad_norm": 0.28125, "learning_rate": 4.771603577491976e-05, "loss": 2.2559, "step": 5462 }, { "epoch": 0.73, "grad_norm": 0.279296875, "learning_rate": 4.771515183537872e-05, "loss": 2.2481, "step": 5463 }, { "epoch": 0.73, "grad_norm": 0.296875, "learning_rate": 4.771426773301043e-05, "loss": 2.2676, "step": 5464 }, { "epoch": 0.73, "grad_norm": 0.291015625, "learning_rate": 4.771338346782122e-05, "loss": 2.2734, "step": 5465 }, { "epoch": 0.73, "grad_norm": 0.29296875, "learning_rate": 4.7712499039817425e-05, "loss": 2.2522, "step": 5466 }, { "epoch": 0.73, "grad_norm": 0.30078125, "learning_rate": 4.771161444900539e-05, "loss": 2.2658, "step": 5467 }, { "epoch": 0.73, "grad_norm": 0.294921875, "learning_rate": 4.7710729695391454e-05, "loss": 2.2733, "step": 5468 }, { "epoch": 0.73, "grad_norm": 0.2890625, "learning_rate": 4.7709844778981957e-05, "loss": 2.2677, "step": 5469 }, { "epoch": 0.73, "grad_norm": 0.29296875, "learning_rate": 4.7708959699783243e-05, "loss": 2.246, "step": 5470 }, { "epoch": 0.73, "grad_norm": 0.291015625, "learning_rate": 4.770807445780166e-05, "loss": 2.2726, "step": 5471 }, { "epoch": 0.73, "grad_norm": 0.34375, "learning_rate": 4.7707189053043554e-05, "loss": 2.233, "step": 5472 }, { "epoch": 0.73, "grad_norm": 0.283203125, "learning_rate": 4.770630348551527e-05, "loss": 2.2517, "step": 5473 }, { "epoch": 0.73, "grad_norm": 0.287109375, "learning_rate": 4.7705417755223147e-05, "loss": 2.2244, "step": 5474 }, { "epoch": 0.73, "grad_norm": 0.30078125, "learning_rate": 4.7704531862173555e-05, "loss": 2.2611, "step": 5475 }, { "epoch": 0.73, "grad_norm": 0.294921875, "learning_rate": 4.770364580637282e-05, "loss": 2.2733, "step": 5476 }, { "epoch": 0.73, "grad_norm": 0.310546875, "learning_rate": 4.770275958782731e-05, "loss": 2.302, "step": 5477 }, { "epoch": 0.73, "grad_norm": 0.298828125, "learning_rate": 4.770187320654337e-05, "loss": 2.2884, "step": 5478 }, { "epoch": 0.73, "grad_norm": 0.302734375, "learning_rate": 4.7700986662527356e-05, "loss": 2.2444, "step": 5479 }, { "epoch": 0.73, "grad_norm": 0.30859375, "learning_rate": 4.7700099955785626e-05, "loss": 2.2728, "step": 5480 }, { "epoch": 0.73, "grad_norm": 0.322265625, "learning_rate": 4.769921308632454e-05, "loss": 2.2777, "step": 5481 }, { "epoch": 0.73, "grad_norm": 0.29296875, "learning_rate": 4.769832605415043e-05, "loss": 2.2441, "step": 5482 }, { "epoch": 0.73, "grad_norm": 0.2890625, "learning_rate": 4.7697438859269686e-05, "loss": 2.2632, "step": 5483 }, { "epoch": 0.73, "grad_norm": 0.318359375, "learning_rate": 4.7696551501688656e-05, "loss": 2.2936, "step": 5484 }, { "epoch": 0.73, "grad_norm": 0.306640625, "learning_rate": 4.76956639814137e-05, "loss": 2.2676, "step": 5485 }, { "epoch": 0.73, "grad_norm": 0.279296875, "learning_rate": 4.769477629845117e-05, "loss": 2.2823, "step": 5486 }, { "epoch": 0.73, "grad_norm": 0.30078125, "learning_rate": 4.769388845280745e-05, "loss": 2.2723, "step": 5487 }, { "epoch": 0.73, "grad_norm": 0.310546875, "learning_rate": 4.7693000444488886e-05, "loss": 2.3049, "step": 5488 }, { "epoch": 0.73, "grad_norm": 0.291015625, "learning_rate": 4.769211227350185e-05, "loss": 2.2415, "step": 5489 }, { "epoch": 0.73, "grad_norm": 0.2890625, "learning_rate": 4.769122393985271e-05, "loss": 2.2609, "step": 5490 }, { "epoch": 0.73, "grad_norm": 0.302734375, "learning_rate": 4.769033544354784e-05, "loss": 2.269, "step": 5491 }, { "epoch": 0.73, "grad_norm": 0.298828125, "learning_rate": 4.768944678459359e-05, "loss": 2.276, "step": 5492 }, { "epoch": 0.73, "grad_norm": 0.296875, "learning_rate": 4.768855796299636e-05, "loss": 2.2839, "step": 5493 }, { "epoch": 0.73, "grad_norm": 0.30078125, "learning_rate": 4.768766897876248e-05, "loss": 2.2625, "step": 5494 }, { "epoch": 0.73, "grad_norm": 0.28515625, "learning_rate": 4.768677983189837e-05, "loss": 2.2792, "step": 5495 }, { "epoch": 0.73, "grad_norm": 0.32421875, "learning_rate": 4.768589052241036e-05, "loss": 2.2422, "step": 5496 }, { "epoch": 0.73, "grad_norm": 0.31640625, "learning_rate": 4.7685001050304844e-05, "loss": 2.2492, "step": 5497 }, { "epoch": 0.73, "grad_norm": 0.29296875, "learning_rate": 4.768411141558821e-05, "loss": 2.2756, "step": 5498 }, { "epoch": 0.73, "grad_norm": 0.291015625, "learning_rate": 4.768322161826681e-05, "loss": 2.2798, "step": 5499 }, { "epoch": 0.73, "grad_norm": 0.302734375, "learning_rate": 4.768233165834704e-05, "loss": 2.2404, "step": 5500 }, { "epoch": 0.73, "grad_norm": 0.279296875, "learning_rate": 4.768144153583528e-05, "loss": 2.277, "step": 5501 }, { "epoch": 0.73, "grad_norm": 0.30859375, "learning_rate": 4.76805512507379e-05, "loss": 2.2663, "step": 5502 }, { "epoch": 0.73, "grad_norm": 0.296875, "learning_rate": 4.7679660803061286e-05, "loss": 2.2948, "step": 5503 }, { "epoch": 0.73, "grad_norm": 0.294921875, "learning_rate": 4.7678770192811824e-05, "loss": 2.2429, "step": 5504 }, { "epoch": 0.73, "grad_norm": 0.302734375, "learning_rate": 4.7677879419995895e-05, "loss": 2.2888, "step": 5505 }, { "epoch": 0.73, "grad_norm": 0.322265625, "learning_rate": 4.767698848461989e-05, "loss": 2.249, "step": 5506 }, { "epoch": 0.73, "grad_norm": 0.28125, "learning_rate": 4.7676097386690186e-05, "loss": 2.2612, "step": 5507 }, { "epoch": 0.73, "grad_norm": 0.294921875, "learning_rate": 4.7675206126213164e-05, "loss": 2.2716, "step": 5508 }, { "epoch": 0.73, "grad_norm": 0.287109375, "learning_rate": 4.7674314703195244e-05, "loss": 2.2666, "step": 5509 }, { "epoch": 0.73, "grad_norm": 0.29296875, "learning_rate": 4.7673423117642786e-05, "loss": 2.2488, "step": 5510 }, { "epoch": 0.74, "grad_norm": 0.296875, "learning_rate": 4.767253136956219e-05, "loss": 2.2731, "step": 5511 }, { "epoch": 0.74, "grad_norm": 0.302734375, "learning_rate": 4.7671639458959855e-05, "loss": 2.2739, "step": 5512 }, { "epoch": 0.74, "grad_norm": 0.283203125, "learning_rate": 4.767074738584216e-05, "loss": 2.223, "step": 5513 }, { "epoch": 0.74, "grad_norm": 0.2734375, "learning_rate": 4.766985515021551e-05, "loss": 2.2839, "step": 5514 }, { "epoch": 0.74, "grad_norm": 0.28515625, "learning_rate": 4.766896275208631e-05, "loss": 2.2629, "step": 5515 }, { "epoch": 0.74, "grad_norm": 0.29296875, "learning_rate": 4.766807019146093e-05, "loss": 2.264, "step": 5516 }, { "epoch": 0.74, "grad_norm": 0.287109375, "learning_rate": 4.76671774683458e-05, "loss": 2.2793, "step": 5517 }, { "epoch": 0.74, "grad_norm": 0.27734375, "learning_rate": 4.7666284582747295e-05, "loss": 2.2764, "step": 5518 }, { "epoch": 0.74, "grad_norm": 0.31640625, "learning_rate": 4.766539153467182e-05, "loss": 2.2841, "step": 5519 }, { "epoch": 0.74, "grad_norm": 0.291015625, "learning_rate": 4.766449832412579e-05, "loss": 2.2763, "step": 5520 }, { "epoch": 0.74, "grad_norm": 0.294921875, "learning_rate": 4.766360495111559e-05, "loss": 2.2785, "step": 5521 }, { "epoch": 0.74, "grad_norm": 0.2890625, "learning_rate": 4.766271141564763e-05, "loss": 2.2596, "step": 5522 }, { "epoch": 0.74, "grad_norm": 0.30859375, "learning_rate": 4.7661817717728324e-05, "loss": 2.2733, "step": 5523 }, { "epoch": 0.74, "grad_norm": 0.283203125, "learning_rate": 4.766092385736407e-05, "loss": 2.2629, "step": 5524 }, { "epoch": 0.74, "grad_norm": 0.28515625, "learning_rate": 4.766002983456127e-05, "loss": 2.2539, "step": 5525 }, { "epoch": 0.74, "grad_norm": 0.2890625, "learning_rate": 4.7659135649326345e-05, "loss": 2.2498, "step": 5526 }, { "epoch": 0.74, "grad_norm": 0.283203125, "learning_rate": 4.76582413016657e-05, "loss": 2.2643, "step": 5527 }, { "epoch": 0.74, "grad_norm": 0.2890625, "learning_rate": 4.765734679158574e-05, "loss": 2.2872, "step": 5528 }, { "epoch": 0.74, "grad_norm": 0.28515625, "learning_rate": 4.765645211909289e-05, "loss": 2.2472, "step": 5529 }, { "epoch": 0.74, "grad_norm": 0.294921875, "learning_rate": 4.765555728419354e-05, "loss": 2.2526, "step": 5530 }, { "epoch": 0.74, "grad_norm": 0.283203125, "learning_rate": 4.765466228689413e-05, "loss": 2.2663, "step": 5531 }, { "epoch": 0.74, "grad_norm": 0.294921875, "learning_rate": 4.765376712720106e-05, "loss": 2.2617, "step": 5532 }, { "epoch": 0.74, "grad_norm": 0.294921875, "learning_rate": 4.765287180512076e-05, "loss": 2.2748, "step": 5533 }, { "epoch": 0.74, "grad_norm": 0.29296875, "learning_rate": 4.765197632065963e-05, "loss": 2.2804, "step": 5534 }, { "epoch": 0.74, "grad_norm": 0.287109375, "learning_rate": 4.76510806738241e-05, "loss": 2.254, "step": 5535 }, { "epoch": 0.74, "grad_norm": 0.322265625, "learning_rate": 4.7650184864620586e-05, "loss": 2.2499, "step": 5536 }, { "epoch": 0.74, "grad_norm": 0.30078125, "learning_rate": 4.764928889305552e-05, "loss": 2.2558, "step": 5537 }, { "epoch": 0.74, "grad_norm": 0.306640625, "learning_rate": 4.764839275913531e-05, "loss": 2.295, "step": 5538 }, { "epoch": 0.74, "grad_norm": 0.28125, "learning_rate": 4.7647496462866395e-05, "loss": 2.2354, "step": 5539 }, { "epoch": 0.74, "grad_norm": 0.2890625, "learning_rate": 4.764660000425518e-05, "loss": 2.2766, "step": 5540 }, { "epoch": 0.74, "grad_norm": 0.283203125, "learning_rate": 4.76457033833081e-05, "loss": 2.2583, "step": 5541 }, { "epoch": 0.74, "grad_norm": 0.28515625, "learning_rate": 4.76448066000316e-05, "loss": 2.2422, "step": 5542 }, { "epoch": 0.74, "grad_norm": 0.298828125, "learning_rate": 4.764390965443208e-05, "loss": 2.2535, "step": 5543 }, { "epoch": 0.74, "grad_norm": 0.310546875, "learning_rate": 4.764301254651599e-05, "loss": 2.2524, "step": 5544 }, { "epoch": 0.74, "grad_norm": 0.30078125, "learning_rate": 4.7642115276289744e-05, "loss": 2.2901, "step": 5545 }, { "epoch": 0.74, "grad_norm": 0.291015625, "learning_rate": 4.764121784375979e-05, "loss": 2.2357, "step": 5546 }, { "epoch": 0.74, "grad_norm": 0.287109375, "learning_rate": 4.764032024893255e-05, "loss": 2.2707, "step": 5547 }, { "epoch": 0.74, "grad_norm": 0.287109375, "learning_rate": 4.763942249181446e-05, "loss": 2.2698, "step": 5548 }, { "epoch": 0.74, "grad_norm": 0.30078125, "learning_rate": 4.763852457241196e-05, "loss": 2.289, "step": 5549 }, { "epoch": 0.74, "grad_norm": 0.298828125, "learning_rate": 4.763762649073148e-05, "loss": 2.2358, "step": 5550 }, { "epoch": 0.74, "grad_norm": 0.306640625, "learning_rate": 4.763672824677946e-05, "loss": 2.2709, "step": 5551 }, { "epoch": 0.74, "grad_norm": 0.294921875, "learning_rate": 4.763582984056235e-05, "loss": 2.2405, "step": 5552 }, { "epoch": 0.74, "grad_norm": 0.294921875, "learning_rate": 4.763493127208657e-05, "loss": 2.2753, "step": 5553 }, { "epoch": 0.74, "grad_norm": 0.3125, "learning_rate": 4.7634032541358576e-05, "loss": 2.2446, "step": 5554 }, { "epoch": 0.74, "grad_norm": 0.3046875, "learning_rate": 4.7633133648384795e-05, "loss": 2.2581, "step": 5555 }, { "epoch": 0.74, "grad_norm": 0.296875, "learning_rate": 4.76322345931717e-05, "loss": 2.2516, "step": 5556 }, { "epoch": 0.74, "grad_norm": 0.2890625, "learning_rate": 4.76313353757257e-05, "loss": 2.2517, "step": 5557 }, { "epoch": 0.74, "grad_norm": 0.2890625, "learning_rate": 4.763043599605326e-05, "loss": 2.2567, "step": 5558 }, { "epoch": 0.74, "grad_norm": 0.30859375, "learning_rate": 4.7629536454160825e-05, "loss": 2.2466, "step": 5559 }, { "epoch": 0.74, "grad_norm": 0.3125, "learning_rate": 4.762863675005485e-05, "loss": 2.2711, "step": 5560 }, { "epoch": 0.74, "grad_norm": 0.29296875, "learning_rate": 4.762773688374177e-05, "loss": 2.2614, "step": 5561 }, { "epoch": 0.74, "grad_norm": 0.31640625, "learning_rate": 4.7626836855228047e-05, "loss": 2.2833, "step": 5562 }, { "epoch": 0.74, "grad_norm": 0.287109375, "learning_rate": 4.762593666452012e-05, "loss": 2.2651, "step": 5563 }, { "epoch": 0.74, "grad_norm": 0.2890625, "learning_rate": 4.762503631162446e-05, "loss": 2.232, "step": 5564 }, { "epoch": 0.74, "grad_norm": 0.296875, "learning_rate": 4.7624135796547506e-05, "loss": 2.2915, "step": 5565 }, { "epoch": 0.74, "grad_norm": 0.306640625, "learning_rate": 4.762323511929571e-05, "loss": 2.2718, "step": 5566 }, { "epoch": 0.74, "grad_norm": 0.29296875, "learning_rate": 4.762233427987555e-05, "loss": 2.2612, "step": 5567 }, { "epoch": 0.74, "grad_norm": 0.283203125, "learning_rate": 4.7621433278293456e-05, "loss": 2.2627, "step": 5568 }, { "epoch": 0.74, "grad_norm": 0.3046875, "learning_rate": 4.7620532114555904e-05, "loss": 2.2957, "step": 5569 }, { "epoch": 0.74, "grad_norm": 0.298828125, "learning_rate": 4.7619630788669344e-05, "loss": 2.283, "step": 5570 }, { "epoch": 0.74, "grad_norm": 0.275390625, "learning_rate": 4.7618729300640255e-05, "loss": 2.2763, "step": 5571 }, { "epoch": 0.74, "grad_norm": 0.27734375, "learning_rate": 4.761782765047508e-05, "loss": 2.2603, "step": 5572 }, { "epoch": 0.74, "grad_norm": 0.30078125, "learning_rate": 4.7616925838180284e-05, "loss": 2.2551, "step": 5573 }, { "epoch": 0.74, "grad_norm": 0.306640625, "learning_rate": 4.761602386376234e-05, "loss": 2.2525, "step": 5574 }, { "epoch": 0.74, "grad_norm": 0.287109375, "learning_rate": 4.761512172722771e-05, "loss": 2.2444, "step": 5575 }, { "epoch": 0.74, "grad_norm": 0.29296875, "learning_rate": 4.761421942858286e-05, "loss": 2.2726, "step": 5576 }, { "epoch": 0.74, "grad_norm": 0.294921875, "learning_rate": 4.7613316967834256e-05, "loss": 2.273, "step": 5577 }, { "epoch": 0.74, "grad_norm": 0.287109375, "learning_rate": 4.761241434498837e-05, "loss": 2.2987, "step": 5578 }, { "epoch": 0.74, "grad_norm": 0.283203125, "learning_rate": 4.761151156005167e-05, "loss": 2.2359, "step": 5579 }, { "epoch": 0.74, "grad_norm": 0.296875, "learning_rate": 4.761060861303064e-05, "loss": 2.2997, "step": 5580 }, { "epoch": 0.74, "grad_norm": 0.302734375, "learning_rate": 4.760970550393173e-05, "loss": 2.238, "step": 5581 }, { "epoch": 0.74, "grad_norm": 0.2890625, "learning_rate": 4.760880223276143e-05, "loss": 2.2776, "step": 5582 }, { "epoch": 0.74, "grad_norm": 0.2890625, "learning_rate": 4.7607898799526206e-05, "loss": 2.2649, "step": 5583 }, { "epoch": 0.74, "grad_norm": 0.2890625, "learning_rate": 4.760699520423254e-05, "loss": 2.2423, "step": 5584 }, { "epoch": 0.74, "grad_norm": 0.275390625, "learning_rate": 4.7606091446886906e-05, "loss": 2.2664, "step": 5585 }, { "epoch": 0.75, "grad_norm": 0.279296875, "learning_rate": 4.760518752749578e-05, "loss": 2.2588, "step": 5586 }, { "epoch": 0.75, "grad_norm": 0.279296875, "learning_rate": 4.760428344606565e-05, "loss": 2.2674, "step": 5587 }, { "epoch": 0.75, "grad_norm": 0.28125, "learning_rate": 4.760337920260299e-05, "loss": 2.2764, "step": 5588 }, { "epoch": 0.75, "grad_norm": 0.279296875, "learning_rate": 4.760247479711428e-05, "loss": 2.2568, "step": 5589 }, { "epoch": 0.75, "grad_norm": 0.294921875, "learning_rate": 4.760157022960602e-05, "loss": 2.2774, "step": 5590 }, { "epoch": 0.75, "grad_norm": 0.28515625, "learning_rate": 4.7600665500084665e-05, "loss": 2.2553, "step": 5591 }, { "epoch": 0.75, "grad_norm": 0.283203125, "learning_rate": 4.759976060855672e-05, "loss": 2.2807, "step": 5592 }, { "epoch": 0.75, "grad_norm": 0.302734375, "learning_rate": 4.759885555502867e-05, "loss": 2.2436, "step": 5593 }, { "epoch": 0.75, "grad_norm": 0.30859375, "learning_rate": 4.7597950339507e-05, "loss": 2.2608, "step": 5594 }, { "epoch": 0.75, "grad_norm": 0.302734375, "learning_rate": 4.7597044961998194e-05, "loss": 2.2744, "step": 5595 }, { "epoch": 0.75, "grad_norm": 0.29296875, "learning_rate": 4.759613942250875e-05, "loss": 2.2824, "step": 5596 }, { "epoch": 0.75, "grad_norm": 0.29296875, "learning_rate": 4.759523372104515e-05, "loss": 2.2368, "step": 5597 }, { "epoch": 0.75, "grad_norm": 0.306640625, "learning_rate": 4.75943278576139e-05, "loss": 2.2712, "step": 5598 }, { "epoch": 0.75, "grad_norm": 0.29296875, "learning_rate": 4.759342183222149e-05, "loss": 2.2733, "step": 5599 }, { "epoch": 0.75, "grad_norm": 0.294921875, "learning_rate": 4.7592515644874396e-05, "loss": 2.269, "step": 5600 }, { "epoch": 0.75, "grad_norm": 0.2890625, "learning_rate": 4.759160929557913e-05, "loss": 2.2928, "step": 5601 }, { "epoch": 0.75, "grad_norm": 0.298828125, "learning_rate": 4.759070278434219e-05, "loss": 2.2494, "step": 5602 }, { "epoch": 0.75, "grad_norm": 0.30078125, "learning_rate": 4.758979611117008e-05, "loss": 2.2763, "step": 5603 }, { "epoch": 0.75, "grad_norm": 0.3046875, "learning_rate": 4.758888927606928e-05, "loss": 2.2722, "step": 5604 }, { "epoch": 0.75, "grad_norm": 0.30078125, "learning_rate": 4.75879822790463e-05, "loss": 2.2577, "step": 5605 }, { "epoch": 0.75, "grad_norm": 0.291015625, "learning_rate": 4.758707512010764e-05, "loss": 2.2772, "step": 5606 }, { "epoch": 0.75, "grad_norm": 0.279296875, "learning_rate": 4.758616779925981e-05, "loss": 2.2413, "step": 5607 }, { "epoch": 0.75, "grad_norm": 0.291015625, "learning_rate": 4.758526031650931e-05, "loss": 2.2632, "step": 5608 }, { "epoch": 0.75, "grad_norm": 0.302734375, "learning_rate": 4.7584352671862644e-05, "loss": 2.2913, "step": 5609 }, { "epoch": 0.75, "grad_norm": 0.291015625, "learning_rate": 4.758344486532631e-05, "loss": 2.2572, "step": 5610 }, { "epoch": 0.75, "grad_norm": 0.3046875, "learning_rate": 4.758253689690683e-05, "loss": 2.2985, "step": 5611 }, { "epoch": 0.75, "grad_norm": 0.32421875, "learning_rate": 4.7581628766610694e-05, "loss": 2.2436, "step": 5612 }, { "epoch": 0.75, "grad_norm": 0.28515625, "learning_rate": 4.758072047444444e-05, "loss": 2.299, "step": 5613 }, { "epoch": 0.75, "grad_norm": 0.3125, "learning_rate": 4.757981202041455e-05, "loss": 2.2858, "step": 5614 }, { "epoch": 0.75, "grad_norm": 0.3046875, "learning_rate": 4.7578903404527554e-05, "loss": 2.2832, "step": 5615 }, { "epoch": 0.75, "grad_norm": 0.29296875, "learning_rate": 4.7577994626789955e-05, "loss": 2.2709, "step": 5616 }, { "epoch": 0.75, "grad_norm": 0.291015625, "learning_rate": 4.757708568720828e-05, "loss": 2.239, "step": 5617 }, { "epoch": 0.75, "grad_norm": 0.318359375, "learning_rate": 4.757617658578902e-05, "loss": 2.2781, "step": 5618 }, { "epoch": 0.75, "grad_norm": 0.326171875, "learning_rate": 4.757526732253872e-05, "loss": 2.2342, "step": 5619 }, { "epoch": 0.75, "grad_norm": 0.298828125, "learning_rate": 4.757435789746389e-05, "loss": 2.2548, "step": 5620 }, { "epoch": 0.75, "grad_norm": 0.294921875, "learning_rate": 4.757344831057103e-05, "loss": 2.2447, "step": 5621 }, { "epoch": 0.75, "grad_norm": 0.2890625, "learning_rate": 4.7572538561866684e-05, "loss": 2.2798, "step": 5622 }, { "epoch": 0.75, "grad_norm": 0.2890625, "learning_rate": 4.757162865135737e-05, "loss": 2.2245, "step": 5623 }, { "epoch": 0.75, "grad_norm": 0.291015625, "learning_rate": 4.7570718579049596e-05, "loss": 2.2524, "step": 5624 }, { "epoch": 0.75, "grad_norm": 0.30859375, "learning_rate": 4.7569808344949894e-05, "loss": 2.2809, "step": 5625 }, { "epoch": 0.75, "grad_norm": 0.302734375, "learning_rate": 4.756889794906479e-05, "loss": 2.2738, "step": 5626 }, { "epoch": 0.75, "grad_norm": 0.298828125, "learning_rate": 4.7567987391400806e-05, "loss": 2.2409, "step": 5627 }, { "epoch": 0.75, "grad_norm": 0.28515625, "learning_rate": 4.756707667196447e-05, "loss": 2.2743, "step": 5628 }, { "epoch": 0.75, "grad_norm": 0.2890625, "learning_rate": 4.7566165790762315e-05, "loss": 2.2736, "step": 5629 }, { "epoch": 0.75, "grad_norm": 0.296875, "learning_rate": 4.756525474780087e-05, "loss": 2.2443, "step": 5630 }, { "epoch": 0.75, "grad_norm": 0.294921875, "learning_rate": 4.7564343543086666e-05, "loss": 2.2886, "step": 5631 }, { "epoch": 0.75, "grad_norm": 0.298828125, "learning_rate": 4.7563432176626224e-05, "loss": 2.2323, "step": 5632 }, { "epoch": 0.75, "grad_norm": 0.287109375, "learning_rate": 4.756252064842609e-05, "loss": 2.2643, "step": 5633 }, { "epoch": 0.75, "grad_norm": 0.2734375, "learning_rate": 4.756160895849279e-05, "loss": 2.2509, "step": 5634 }, { "epoch": 0.75, "grad_norm": 0.267578125, "learning_rate": 4.756069710683286e-05, "loss": 2.2645, "step": 5635 }, { "epoch": 0.75, "grad_norm": 0.283203125, "learning_rate": 4.755978509345284e-05, "loss": 2.2767, "step": 5636 }, { "epoch": 0.75, "grad_norm": 0.30078125, "learning_rate": 4.755887291835927e-05, "loss": 2.2568, "step": 5637 }, { "epoch": 0.75, "grad_norm": 0.28515625, "learning_rate": 4.755796058155867e-05, "loss": 2.265, "step": 5638 }, { "epoch": 0.75, "grad_norm": 0.31640625, "learning_rate": 4.755704808305761e-05, "loss": 2.251, "step": 5639 }, { "epoch": 0.75, "grad_norm": 0.29296875, "learning_rate": 4.7556135422862604e-05, "loss": 2.2428, "step": 5640 }, { "epoch": 0.75, "grad_norm": 0.306640625, "learning_rate": 4.755522260098021e-05, "loss": 2.2384, "step": 5641 }, { "epoch": 0.75, "grad_norm": 0.291015625, "learning_rate": 4.755430961741697e-05, "loss": 2.2896, "step": 5642 }, { "epoch": 0.75, "grad_norm": 0.287109375, "learning_rate": 4.755339647217942e-05, "loss": 2.2619, "step": 5643 }, { "epoch": 0.75, "grad_norm": 0.294921875, "learning_rate": 4.755248316527411e-05, "loss": 2.2481, "step": 5644 }, { "epoch": 0.75, "grad_norm": 0.328125, "learning_rate": 4.755156969670759e-05, "loss": 2.2673, "step": 5645 }, { "epoch": 0.75, "grad_norm": 0.30078125, "learning_rate": 4.7550656066486406e-05, "loss": 2.2457, "step": 5646 }, { "epoch": 0.75, "grad_norm": 0.296875, "learning_rate": 4.7549742274617105e-05, "loss": 2.2636, "step": 5647 }, { "epoch": 0.75, "grad_norm": 0.298828125, "learning_rate": 4.754882832110624e-05, "loss": 2.2578, "step": 5648 }, { "epoch": 0.75, "grad_norm": 0.30859375, "learning_rate": 4.754791420596036e-05, "loss": 2.2378, "step": 5649 }, { "epoch": 0.75, "grad_norm": 0.29296875, "learning_rate": 4.7546999929186024e-05, "loss": 2.2652, "step": 5650 }, { "epoch": 0.75, "grad_norm": 0.279296875, "learning_rate": 4.754608549078977e-05, "loss": 2.2572, "step": 5651 }, { "epoch": 0.75, "grad_norm": 0.2890625, "learning_rate": 4.7545170890778165e-05, "loss": 2.2817, "step": 5652 }, { "epoch": 0.75, "grad_norm": 0.296875, "learning_rate": 4.754425612915777e-05, "loss": 2.2395, "step": 5653 }, { "epoch": 0.75, "grad_norm": 0.322265625, "learning_rate": 4.754334120593513e-05, "loss": 2.2542, "step": 5654 }, { "epoch": 0.75, "grad_norm": 0.3046875, "learning_rate": 4.754242612111681e-05, "loss": 2.2555, "step": 5655 }, { "epoch": 0.75, "grad_norm": 0.29296875, "learning_rate": 4.7541510874709376e-05, "loss": 2.2627, "step": 5656 }, { "epoch": 0.75, "grad_norm": 0.3046875, "learning_rate": 4.754059546671937e-05, "loss": 2.2528, "step": 5657 }, { "epoch": 0.75, "grad_norm": 0.28515625, "learning_rate": 4.7539679897153375e-05, "loss": 2.2903, "step": 5658 }, { "epoch": 0.75, "grad_norm": 0.3046875, "learning_rate": 4.7538764166017944e-05, "loss": 2.2643, "step": 5659 }, { "epoch": 0.75, "grad_norm": 0.296875, "learning_rate": 4.753784827331964e-05, "loss": 2.273, "step": 5660 }, { "epoch": 0.76, "grad_norm": 0.27734375, "learning_rate": 4.7536932219065024e-05, "loss": 2.2777, "step": 5661 }, { "epoch": 0.76, "grad_norm": 0.291015625, "learning_rate": 4.753601600326067e-05, "loss": 2.2443, "step": 5662 }, { "epoch": 0.76, "grad_norm": 0.30078125, "learning_rate": 4.753509962591315e-05, "loss": 2.2497, "step": 5663 }, { "epoch": 0.76, "grad_norm": 0.29296875, "learning_rate": 4.753418308702901e-05, "loss": 2.253, "step": 5664 }, { "epoch": 0.76, "grad_norm": 0.294921875, "learning_rate": 4.7533266386614855e-05, "loss": 2.2686, "step": 5665 }, { "epoch": 0.76, "grad_norm": 0.302734375, "learning_rate": 4.753234952467723e-05, "loss": 2.2373, "step": 5666 }, { "epoch": 0.76, "grad_norm": 0.27734375, "learning_rate": 4.753143250122272e-05, "loss": 2.2382, "step": 5667 }, { "epoch": 0.76, "grad_norm": 0.275390625, "learning_rate": 4.753051531625789e-05, "loss": 2.2732, "step": 5668 }, { "epoch": 0.76, "grad_norm": 0.28515625, "learning_rate": 4.752959796978931e-05, "loss": 2.2664, "step": 5669 }, { "epoch": 0.76, "grad_norm": 0.306640625, "learning_rate": 4.752868046182357e-05, "loss": 2.2804, "step": 5670 }, { "epoch": 0.76, "grad_norm": 0.294921875, "learning_rate": 4.7527762792367235e-05, "loss": 2.2694, "step": 5671 }, { "epoch": 0.76, "grad_norm": 0.294921875, "learning_rate": 4.7526844961426896e-05, "loss": 2.2587, "step": 5672 }, { "epoch": 0.76, "grad_norm": 0.291015625, "learning_rate": 4.752592696900913e-05, "loss": 2.2616, "step": 5673 }, { "epoch": 0.76, "grad_norm": 0.2890625, "learning_rate": 4.75250088151205e-05, "loss": 2.2749, "step": 5674 }, { "epoch": 0.76, "grad_norm": 0.271484375, "learning_rate": 4.7524090499767607e-05, "loss": 2.2303, "step": 5675 }, { "epoch": 0.76, "grad_norm": 0.30078125, "learning_rate": 4.752317202295702e-05, "loss": 2.2713, "step": 5676 }, { "epoch": 0.76, "grad_norm": 0.2890625, "learning_rate": 4.7522253384695336e-05, "loss": 2.2649, "step": 5677 }, { "epoch": 0.76, "grad_norm": 0.28515625, "learning_rate": 4.752133458498913e-05, "loss": 2.2668, "step": 5678 }, { "epoch": 0.76, "grad_norm": 0.302734375, "learning_rate": 4.752041562384499e-05, "loss": 2.2501, "step": 5679 }, { "epoch": 0.76, "grad_norm": 0.283203125, "learning_rate": 4.751949650126951e-05, "loss": 2.2786, "step": 5680 }, { "epoch": 0.76, "grad_norm": 0.279296875, "learning_rate": 4.751857721726926e-05, "loss": 2.2737, "step": 5681 }, { "epoch": 0.76, "grad_norm": 0.2890625, "learning_rate": 4.751765777185087e-05, "loss": 2.2388, "step": 5682 }, { "epoch": 0.76, "grad_norm": 0.302734375, "learning_rate": 4.751673816502088e-05, "loss": 2.2515, "step": 5683 }, { "epoch": 0.76, "grad_norm": 0.296875, "learning_rate": 4.751581839678591e-05, "loss": 2.2552, "step": 5684 }, { "epoch": 0.76, "grad_norm": 0.291015625, "learning_rate": 4.7514898467152544e-05, "loss": 2.2831, "step": 5685 }, { "epoch": 0.76, "grad_norm": 0.28515625, "learning_rate": 4.751397837612739e-05, "loss": 2.2787, "step": 5686 }, { "epoch": 0.76, "grad_norm": 0.291015625, "learning_rate": 4.751305812371704e-05, "loss": 2.2571, "step": 5687 }, { "epoch": 0.76, "grad_norm": 0.28515625, "learning_rate": 4.751213770992807e-05, "loss": 2.268, "step": 5688 }, { "epoch": 0.76, "grad_norm": 0.283203125, "learning_rate": 4.75112171347671e-05, "loss": 2.27, "step": 5689 }, { "epoch": 0.76, "grad_norm": 0.30859375, "learning_rate": 4.751029639824072e-05, "loss": 2.2606, "step": 5690 }, { "epoch": 0.76, "grad_norm": 0.2890625, "learning_rate": 4.7509375500355536e-05, "loss": 2.24, "step": 5691 }, { "epoch": 0.76, "grad_norm": 0.291015625, "learning_rate": 4.750845444111814e-05, "loss": 2.2856, "step": 5692 }, { "epoch": 0.76, "grad_norm": 0.294921875, "learning_rate": 4.750753322053514e-05, "loss": 2.2701, "step": 5693 }, { "epoch": 0.76, "grad_norm": 0.322265625, "learning_rate": 4.7506611838613135e-05, "loss": 2.2709, "step": 5694 }, { "epoch": 0.76, "grad_norm": 0.28125, "learning_rate": 4.750569029535874e-05, "loss": 2.2844, "step": 5695 }, { "epoch": 0.76, "grad_norm": 0.30859375, "learning_rate": 4.750476859077855e-05, "loss": 2.2552, "step": 5696 }, { "epoch": 0.76, "grad_norm": 0.298828125, "learning_rate": 4.750384672487918e-05, "loss": 2.2605, "step": 5697 }, { "epoch": 0.76, "grad_norm": 0.27734375, "learning_rate": 4.7502924697667226e-05, "loss": 2.2516, "step": 5698 }, { "epoch": 0.76, "grad_norm": 0.291015625, "learning_rate": 4.7502002509149315e-05, "loss": 2.2844, "step": 5699 }, { "epoch": 0.76, "grad_norm": 0.2890625, "learning_rate": 4.7501080159332044e-05, "loss": 2.253, "step": 5700 }, { "epoch": 0.76, "grad_norm": 0.30078125, "learning_rate": 4.7500157648222035e-05, "loss": 2.2676, "step": 5701 }, { "epoch": 0.76, "grad_norm": 0.283203125, "learning_rate": 4.749923497582588e-05, "loss": 2.266, "step": 5702 }, { "epoch": 0.76, "grad_norm": 0.310546875, "learning_rate": 4.749831214215021e-05, "loss": 2.2836, "step": 5703 }, { "epoch": 0.76, "grad_norm": 0.291015625, "learning_rate": 4.7497389147201655e-05, "loss": 2.2737, "step": 5704 }, { "epoch": 0.76, "grad_norm": 0.287109375, "learning_rate": 4.749646599098679e-05, "loss": 2.2915, "step": 5705 }, { "epoch": 0.76, "grad_norm": 0.291015625, "learning_rate": 4.749554267351227e-05, "loss": 2.318, "step": 5706 }, { "epoch": 0.76, "grad_norm": 0.296875, "learning_rate": 4.74946191947847e-05, "loss": 2.2593, "step": 5707 }, { "epoch": 0.76, "grad_norm": 0.28125, "learning_rate": 4.74936955548107e-05, "loss": 2.2452, "step": 5708 }, { "epoch": 0.76, "grad_norm": 0.29296875, "learning_rate": 4.7492771753596886e-05, "loss": 2.2557, "step": 5709 }, { "epoch": 0.76, "grad_norm": 0.298828125, "learning_rate": 4.749184779114988e-05, "loss": 2.2185, "step": 5710 }, { "epoch": 0.76, "grad_norm": 0.30078125, "learning_rate": 4.7490923667476314e-05, "loss": 2.2665, "step": 5711 }, { "epoch": 0.76, "grad_norm": 0.27734375, "learning_rate": 4.7489999382582805e-05, "loss": 2.2469, "step": 5712 }, { "epoch": 0.76, "grad_norm": 0.287109375, "learning_rate": 4.748907493647599e-05, "loss": 2.2338, "step": 5713 }, { "epoch": 0.76, "grad_norm": 0.296875, "learning_rate": 4.748815032916248e-05, "loss": 2.2747, "step": 5714 }, { "epoch": 0.76, "grad_norm": 0.3125, "learning_rate": 4.748722556064891e-05, "loss": 2.271, "step": 5715 }, { "epoch": 0.76, "grad_norm": 0.28515625, "learning_rate": 4.7486300630941906e-05, "loss": 2.2613, "step": 5716 }, { "epoch": 0.76, "grad_norm": 0.294921875, "learning_rate": 4.7485375540048105e-05, "loss": 2.2356, "step": 5717 }, { "epoch": 0.76, "grad_norm": 0.306640625, "learning_rate": 4.748445028797413e-05, "loss": 2.2847, "step": 5718 }, { "epoch": 0.76, "grad_norm": 0.2890625, "learning_rate": 4.7483524874726614e-05, "loss": 2.2515, "step": 5719 }, { "epoch": 0.76, "grad_norm": 0.298828125, "learning_rate": 4.74825993003122e-05, "loss": 2.2726, "step": 5720 }, { "epoch": 0.76, "grad_norm": 0.298828125, "learning_rate": 4.7481673564737516e-05, "loss": 2.2276, "step": 5721 }, { "epoch": 0.76, "grad_norm": 0.287109375, "learning_rate": 4.7480747668009196e-05, "loss": 2.2739, "step": 5722 }, { "epoch": 0.76, "grad_norm": 0.287109375, "learning_rate": 4.747982161013388e-05, "loss": 2.2502, "step": 5723 }, { "epoch": 0.76, "grad_norm": 0.291015625, "learning_rate": 4.747889539111821e-05, "loss": 2.2714, "step": 5724 }, { "epoch": 0.76, "grad_norm": 0.294921875, "learning_rate": 4.7477969010968816e-05, "loss": 2.239, "step": 5725 }, { "epoch": 0.76, "grad_norm": 0.2890625, "learning_rate": 4.747704246969235e-05, "loss": 2.2316, "step": 5726 }, { "epoch": 0.76, "grad_norm": 0.296875, "learning_rate": 4.747611576729544e-05, "loss": 2.2409, "step": 5727 }, { "epoch": 0.76, "grad_norm": 0.314453125, "learning_rate": 4.747518890378474e-05, "loss": 2.2882, "step": 5728 }, { "epoch": 0.76, "grad_norm": 0.30859375, "learning_rate": 4.7474261879166884e-05, "loss": 2.2682, "step": 5729 }, { "epoch": 0.76, "grad_norm": 0.294921875, "learning_rate": 4.7473334693448525e-05, "loss": 2.257, "step": 5730 }, { "epoch": 0.76, "grad_norm": 0.3125, "learning_rate": 4.747240734663631e-05, "loss": 2.2471, "step": 5731 }, { "epoch": 0.76, "grad_norm": 0.298828125, "learning_rate": 4.747147983873689e-05, "loss": 2.2583, "step": 5732 }, { "epoch": 0.76, "grad_norm": 0.31640625, "learning_rate": 4.7470552169756896e-05, "loss": 2.2967, "step": 5733 }, { "epoch": 0.76, "grad_norm": 0.287109375, "learning_rate": 4.7469624339702986e-05, "loss": 2.3051, "step": 5734 }, { "epoch": 0.77, "grad_norm": 0.302734375, "learning_rate": 4.7468696348581824e-05, "loss": 2.278, "step": 5735 }, { "epoch": 0.77, "grad_norm": 0.31640625, "learning_rate": 4.746776819640005e-05, "loss": 2.2716, "step": 5736 }, { "epoch": 0.77, "grad_norm": 0.291015625, "learning_rate": 4.746683988316432e-05, "loss": 2.2598, "step": 5737 }, { "epoch": 0.77, "grad_norm": 0.318359375, "learning_rate": 4.746591140888129e-05, "loss": 2.2588, "step": 5738 }, { "epoch": 0.77, "grad_norm": 0.291015625, "learning_rate": 4.746498277355761e-05, "loss": 2.2681, "step": 5739 }, { "epoch": 0.77, "grad_norm": 0.28515625, "learning_rate": 4.746405397719993e-05, "loss": 2.2593, "step": 5740 }, { "epoch": 0.77, "grad_norm": 0.294921875, "learning_rate": 4.746312501981493e-05, "loss": 2.2574, "step": 5741 }, { "epoch": 0.77, "grad_norm": 0.27734375, "learning_rate": 4.746219590140925e-05, "loss": 2.3112, "step": 5742 }, { "epoch": 0.77, "grad_norm": 0.29296875, "learning_rate": 4.746126662198957e-05, "loss": 2.2605, "step": 5743 }, { "epoch": 0.77, "grad_norm": 0.287109375, "learning_rate": 4.746033718156252e-05, "loss": 2.2336, "step": 5744 }, { "epoch": 0.77, "grad_norm": 0.28515625, "learning_rate": 4.7459407580134784e-05, "loss": 2.3083, "step": 5745 }, { "epoch": 0.77, "grad_norm": 0.2890625, "learning_rate": 4.745847781771303e-05, "loss": 2.2405, "step": 5746 }, { "epoch": 0.77, "grad_norm": 0.291015625, "learning_rate": 4.7457547894303904e-05, "loss": 2.2748, "step": 5747 }, { "epoch": 0.77, "grad_norm": 0.291015625, "learning_rate": 4.745661780991409e-05, "loss": 2.2946, "step": 5748 }, { "epoch": 0.77, "grad_norm": 0.28515625, "learning_rate": 4.745568756455024e-05, "loss": 2.2972, "step": 5749 }, { "epoch": 0.77, "grad_norm": 0.271484375, "learning_rate": 4.745475715821904e-05, "loss": 2.2664, "step": 5750 }, { "epoch": 0.77, "grad_norm": 0.28125, "learning_rate": 4.745382659092715e-05, "loss": 2.2885, "step": 5751 }, { "epoch": 0.77, "grad_norm": 0.3359375, "learning_rate": 4.745289586268124e-05, "loss": 2.2728, "step": 5752 }, { "epoch": 0.77, "grad_norm": 0.27734375, "learning_rate": 4.7451964973487974e-05, "loss": 2.2444, "step": 5753 }, { "epoch": 0.77, "grad_norm": 0.298828125, "learning_rate": 4.7451033923354025e-05, "loss": 2.2246, "step": 5754 }, { "epoch": 0.77, "grad_norm": 0.29296875, "learning_rate": 4.7450102712286086e-05, "loss": 2.3223, "step": 5755 }, { "epoch": 0.77, "grad_norm": 0.294921875, "learning_rate": 4.744917134029082e-05, "loss": 2.2722, "step": 5756 }, { "epoch": 0.77, "grad_norm": 0.294921875, "learning_rate": 4.74482398073749e-05, "loss": 2.2631, "step": 5757 }, { "epoch": 0.77, "grad_norm": 0.27734375, "learning_rate": 4.744730811354501e-05, "loss": 2.2547, "step": 5758 }, { "epoch": 0.77, "grad_norm": 0.275390625, "learning_rate": 4.744637625880782e-05, "loss": 2.2628, "step": 5759 }, { "epoch": 0.77, "grad_norm": 0.29296875, "learning_rate": 4.7445444243170025e-05, "loss": 2.2488, "step": 5760 }, { "epoch": 0.77, "grad_norm": 0.298828125, "learning_rate": 4.7444512066638293e-05, "loss": 2.2646, "step": 5761 }, { "epoch": 0.77, "grad_norm": 0.296875, "learning_rate": 4.74435797292193e-05, "loss": 2.2901, "step": 5762 }, { "epoch": 0.77, "grad_norm": 0.279296875, "learning_rate": 4.744264723091974e-05, "loss": 2.2588, "step": 5763 }, { "epoch": 0.77, "grad_norm": 0.30078125, "learning_rate": 4.744171457174631e-05, "loss": 2.2712, "step": 5764 }, { "epoch": 0.77, "grad_norm": 0.296875, "learning_rate": 4.7440781751705674e-05, "loss": 2.2834, "step": 5765 }, { "epoch": 0.77, "grad_norm": 0.294921875, "learning_rate": 4.743984877080452e-05, "loss": 2.2801, "step": 5766 }, { "epoch": 0.77, "grad_norm": 0.275390625, "learning_rate": 4.7438915629049545e-05, "loss": 2.2905, "step": 5767 }, { "epoch": 0.77, "grad_norm": 0.30078125, "learning_rate": 4.743798232644744e-05, "loss": 2.2945, "step": 5768 }, { "epoch": 0.77, "grad_norm": 0.306640625, "learning_rate": 4.743704886300488e-05, "loss": 2.2474, "step": 5769 }, { "epoch": 0.77, "grad_norm": 0.30078125, "learning_rate": 4.743611523872857e-05, "loss": 2.273, "step": 5770 }, { "epoch": 0.77, "grad_norm": 0.287109375, "learning_rate": 4.743518145362521e-05, "loss": 2.2777, "step": 5771 }, { "epoch": 0.77, "grad_norm": 0.3046875, "learning_rate": 4.743424750770147e-05, "loss": 2.2754, "step": 5772 }, { "epoch": 0.77, "grad_norm": 0.28515625, "learning_rate": 4.7433313400964066e-05, "loss": 2.2894, "step": 5773 }, { "epoch": 0.77, "grad_norm": 0.283203125, "learning_rate": 4.743237913341967e-05, "loss": 2.2884, "step": 5774 }, { "epoch": 0.77, "grad_norm": 0.283203125, "learning_rate": 4.7431444705075004e-05, "loss": 2.2566, "step": 5775 }, { "epoch": 0.77, "grad_norm": 0.30078125, "learning_rate": 4.7430510115936754e-05, "loss": 2.2766, "step": 5776 }, { "epoch": 0.77, "grad_norm": 0.275390625, "learning_rate": 4.7429575366011626e-05, "loss": 2.2493, "step": 5777 }, { "epoch": 0.77, "grad_norm": 0.30859375, "learning_rate": 4.74286404553063e-05, "loss": 2.2375, "step": 5778 }, { "epoch": 0.77, "grad_norm": 0.283203125, "learning_rate": 4.742770538382751e-05, "loss": 2.2701, "step": 5779 }, { "epoch": 0.77, "grad_norm": 0.306640625, "learning_rate": 4.742677015158194e-05, "loss": 2.2576, "step": 5780 }, { "epoch": 0.77, "grad_norm": 0.302734375, "learning_rate": 4.7425834758576296e-05, "loss": 2.2592, "step": 5781 }, { "epoch": 0.77, "grad_norm": 0.291015625, "learning_rate": 4.742489920481728e-05, "loss": 2.2921, "step": 5782 }, { "epoch": 0.77, "grad_norm": 0.2890625, "learning_rate": 4.7423963490311605e-05, "loss": 2.2614, "step": 5783 }, { "epoch": 0.77, "grad_norm": 0.2734375, "learning_rate": 4.742302761506597e-05, "loss": 2.2845, "step": 5784 }, { "epoch": 0.77, "grad_norm": 0.3046875, "learning_rate": 4.742209157908709e-05, "loss": 2.2814, "step": 5785 }, { "epoch": 0.77, "grad_norm": 0.291015625, "learning_rate": 4.742115538238168e-05, "loss": 2.2681, "step": 5786 }, { "epoch": 0.77, "grad_norm": 0.296875, "learning_rate": 4.742021902495644e-05, "loss": 2.2593, "step": 5787 }, { "epoch": 0.77, "grad_norm": 0.2890625, "learning_rate": 4.7419282506818084e-05, "loss": 2.2131, "step": 5788 }, { "epoch": 0.77, "grad_norm": 0.296875, "learning_rate": 4.7418345827973326e-05, "loss": 2.2947, "step": 5789 }, { "epoch": 0.77, "grad_norm": 0.31640625, "learning_rate": 4.741740898842889e-05, "loss": 2.3101, "step": 5790 }, { "epoch": 0.77, "grad_norm": 0.28515625, "learning_rate": 4.7416471988191465e-05, "loss": 2.2139, "step": 5791 }, { "epoch": 0.77, "grad_norm": 0.294921875, "learning_rate": 4.7415534827267806e-05, "loss": 2.2762, "step": 5792 }, { "epoch": 0.77, "grad_norm": 0.296875, "learning_rate": 4.7414597505664606e-05, "loss": 2.2566, "step": 5793 }, { "epoch": 0.77, "grad_norm": 0.294921875, "learning_rate": 4.7413660023388586e-05, "loss": 2.2322, "step": 5794 }, { "epoch": 0.77, "grad_norm": 0.2890625, "learning_rate": 4.741272238044646e-05, "loss": 2.2223, "step": 5795 }, { "epoch": 0.77, "grad_norm": 0.279296875, "learning_rate": 4.741178457684497e-05, "loss": 2.2633, "step": 5796 }, { "epoch": 0.77, "grad_norm": 0.28515625, "learning_rate": 4.741084661259082e-05, "loss": 2.2839, "step": 5797 }, { "epoch": 0.77, "grad_norm": 0.296875, "learning_rate": 4.740990848769075e-05, "loss": 2.2348, "step": 5798 }, { "epoch": 0.77, "grad_norm": 0.302734375, "learning_rate": 4.740897020215146e-05, "loss": 2.2901, "step": 5799 }, { "epoch": 0.77, "grad_norm": 0.27734375, "learning_rate": 4.74080317559797e-05, "loss": 2.263, "step": 5800 }, { "epoch": 0.77, "grad_norm": 0.302734375, "learning_rate": 4.740709314918218e-05, "loss": 2.2652, "step": 5801 }, { "epoch": 0.77, "grad_norm": 0.306640625, "learning_rate": 4.740615438176564e-05, "loss": 2.2502, "step": 5802 }, { "epoch": 0.77, "grad_norm": 0.291015625, "learning_rate": 4.74052154537368e-05, "loss": 2.2581, "step": 5803 }, { "epoch": 0.77, "grad_norm": 0.294921875, "learning_rate": 4.74042763651024e-05, "loss": 2.2712, "step": 5804 }, { "epoch": 0.77, "grad_norm": 0.279296875, "learning_rate": 4.740333711586916e-05, "loss": 2.2563, "step": 5805 }, { "epoch": 0.77, "grad_norm": 0.283203125, "learning_rate": 4.7402397706043836e-05, "loss": 2.2707, "step": 5806 }, { "epoch": 0.77, "grad_norm": 0.314453125, "learning_rate": 4.7401458135633124e-05, "loss": 2.2446, "step": 5807 }, { "epoch": 0.77, "grad_norm": 0.28515625, "learning_rate": 4.74005184046438e-05, "loss": 2.2814, "step": 5808 }, { "epoch": 0.77, "grad_norm": 0.302734375, "learning_rate": 4.739957851308256e-05, "loss": 2.2578, "step": 5809 }, { "epoch": 0.78, "grad_norm": 0.2890625, "learning_rate": 4.739863846095618e-05, "loss": 2.2528, "step": 5810 }, { "epoch": 0.78, "grad_norm": 0.298828125, "learning_rate": 4.739769824827137e-05, "loss": 2.2739, "step": 5811 }, { "epoch": 0.78, "grad_norm": 0.318359375, "learning_rate": 4.739675787503489e-05, "loss": 2.2719, "step": 5812 }, { "epoch": 0.78, "grad_norm": 0.31640625, "learning_rate": 4.7395817341253465e-05, "loss": 2.261, "step": 5813 }, { "epoch": 0.78, "grad_norm": 0.28515625, "learning_rate": 4.739487664693384e-05, "loss": 2.3035, "step": 5814 }, { "epoch": 0.78, "grad_norm": 0.28515625, "learning_rate": 4.7393935792082765e-05, "loss": 2.254, "step": 5815 }, { "epoch": 0.78, "grad_norm": 0.2890625, "learning_rate": 4.7392994776706975e-05, "loss": 2.2346, "step": 5816 }, { "epoch": 0.78, "grad_norm": 0.3046875, "learning_rate": 4.739205360081323e-05, "loss": 2.2272, "step": 5817 }, { "epoch": 0.78, "grad_norm": 0.29296875, "learning_rate": 4.739111226440826e-05, "loss": 2.2706, "step": 5818 }, { "epoch": 0.78, "grad_norm": 0.298828125, "learning_rate": 4.739017076749882e-05, "loss": 2.2417, "step": 5819 }, { "epoch": 0.78, "grad_norm": 0.28125, "learning_rate": 4.738922911009166e-05, "loss": 2.2375, "step": 5820 }, { "epoch": 0.78, "grad_norm": 0.28515625, "learning_rate": 4.7388287292193535e-05, "loss": 2.2557, "step": 5821 }, { "epoch": 0.78, "grad_norm": 0.287109375, "learning_rate": 4.738734531381118e-05, "loss": 2.25, "step": 5822 }, { "epoch": 0.78, "grad_norm": 0.291015625, "learning_rate": 4.738640317495137e-05, "loss": 2.27, "step": 5823 }, { "epoch": 0.78, "grad_norm": 0.291015625, "learning_rate": 4.738546087562084e-05, "loss": 2.2885, "step": 5824 }, { "epoch": 0.78, "grad_norm": 0.287109375, "learning_rate": 4.738451841582634e-05, "loss": 2.2768, "step": 5825 }, { "epoch": 0.78, "grad_norm": 0.28125, "learning_rate": 4.738357579557464e-05, "loss": 2.2104, "step": 5826 }, { "epoch": 0.78, "grad_norm": 0.294921875, "learning_rate": 4.73826330148725e-05, "loss": 2.2778, "step": 5827 }, { "epoch": 0.78, "grad_norm": 0.298828125, "learning_rate": 4.738169007372667e-05, "loss": 2.2694, "step": 5828 }, { "epoch": 0.78, "grad_norm": 0.283203125, "learning_rate": 4.7380746972143906e-05, "loss": 2.2739, "step": 5829 }, { "epoch": 0.78, "grad_norm": 0.29296875, "learning_rate": 4.7379803710130974e-05, "loss": 2.261, "step": 5830 }, { "epoch": 0.78, "grad_norm": 0.294921875, "learning_rate": 4.737886028769464e-05, "loss": 2.2628, "step": 5831 }, { "epoch": 0.78, "grad_norm": 0.283203125, "learning_rate": 4.737791670484165e-05, "loss": 2.2444, "step": 5832 }, { "epoch": 0.78, "grad_norm": 0.291015625, "learning_rate": 4.7376972961578785e-05, "loss": 2.3133, "step": 5833 }, { "epoch": 0.78, "grad_norm": 0.294921875, "learning_rate": 4.737602905791281e-05, "loss": 2.2493, "step": 5834 }, { "epoch": 0.78, "grad_norm": 0.3046875, "learning_rate": 4.737508499385047e-05, "loss": 2.2707, "step": 5835 }, { "epoch": 0.78, "grad_norm": 0.287109375, "learning_rate": 4.7374140769398556e-05, "loss": 2.2406, "step": 5836 }, { "epoch": 0.78, "grad_norm": 0.287109375, "learning_rate": 4.7373196384563825e-05, "loss": 2.265, "step": 5837 }, { "epoch": 0.78, "grad_norm": 0.27734375, "learning_rate": 4.7372251839353055e-05, "loss": 2.236, "step": 5838 }, { "epoch": 0.78, "grad_norm": 0.298828125, "learning_rate": 4.7371307133773e-05, "loss": 2.2936, "step": 5839 }, { "epoch": 0.78, "grad_norm": 0.298828125, "learning_rate": 4.737036226783045e-05, "loss": 2.2521, "step": 5840 }, { "epoch": 0.78, "grad_norm": 0.30078125, "learning_rate": 4.736941724153217e-05, "loss": 2.2565, "step": 5841 }, { "epoch": 0.78, "grad_norm": 0.28515625, "learning_rate": 4.736847205488493e-05, "loss": 2.2892, "step": 5842 }, { "epoch": 0.78, "grad_norm": 0.2890625, "learning_rate": 4.7367526707895515e-05, "loss": 2.2278, "step": 5843 }, { "epoch": 0.78, "grad_norm": 0.283203125, "learning_rate": 4.7366581200570696e-05, "loss": 2.2559, "step": 5844 }, { "epoch": 0.78, "grad_norm": 0.28515625, "learning_rate": 4.736563553291725e-05, "loss": 2.257, "step": 5845 }, { "epoch": 0.78, "grad_norm": 0.28515625, "learning_rate": 4.736468970494196e-05, "loss": 2.2803, "step": 5846 }, { "epoch": 0.78, "grad_norm": 0.296875, "learning_rate": 4.73637437166516e-05, "loss": 2.2749, "step": 5847 }, { "epoch": 0.78, "grad_norm": 0.291015625, "learning_rate": 4.7362797568052954e-05, "loss": 2.2797, "step": 5848 }, { "epoch": 0.78, "grad_norm": 0.29296875, "learning_rate": 4.7361851259152804e-05, "loss": 2.2622, "step": 5849 }, { "epoch": 0.78, "grad_norm": 0.291015625, "learning_rate": 4.736090478995793e-05, "loss": 2.2639, "step": 5850 }, { "epoch": 0.78, "grad_norm": 0.28515625, "learning_rate": 4.735995816047512e-05, "loss": 2.2616, "step": 5851 }, { "epoch": 0.78, "grad_norm": 0.2890625, "learning_rate": 4.735901137071116e-05, "loss": 2.2408, "step": 5852 }, { "epoch": 0.78, "grad_norm": 0.3125, "learning_rate": 4.735806442067284e-05, "loss": 2.2439, "step": 5853 }, { "epoch": 0.78, "grad_norm": 0.2890625, "learning_rate": 4.735711731036694e-05, "loss": 2.2701, "step": 5854 }, { "epoch": 0.78, "grad_norm": 0.31640625, "learning_rate": 4.735617003980026e-05, "loss": 2.2725, "step": 5855 }, { "epoch": 0.78, "grad_norm": 0.296875, "learning_rate": 4.735522260897958e-05, "loss": 2.2465, "step": 5856 }, { "epoch": 0.78, "grad_norm": 0.287109375, "learning_rate": 4.735427501791169e-05, "loss": 2.2553, "step": 5857 }, { "epoch": 0.78, "grad_norm": 0.294921875, "learning_rate": 4.735332726660339e-05, "loss": 2.2841, "step": 5858 }, { "epoch": 0.78, "grad_norm": 0.291015625, "learning_rate": 4.735237935506148e-05, "loss": 2.2656, "step": 5859 }, { "epoch": 0.78, "grad_norm": 0.291015625, "learning_rate": 4.735143128329274e-05, "loss": 2.2477, "step": 5860 }, { "epoch": 0.78, "grad_norm": 0.302734375, "learning_rate": 4.735048305130396e-05, "loss": 2.236, "step": 5861 }, { "epoch": 0.78, "grad_norm": 0.29296875, "learning_rate": 4.734953465910197e-05, "loss": 2.2559, "step": 5862 }, { "epoch": 0.78, "grad_norm": 0.294921875, "learning_rate": 4.7348586106693535e-05, "loss": 2.2687, "step": 5863 }, { "epoch": 0.78, "grad_norm": 0.283203125, "learning_rate": 4.734763739408546e-05, "loss": 2.2647, "step": 5864 }, { "epoch": 0.78, "grad_norm": 0.294921875, "learning_rate": 4.7346688521284566e-05, "loss": 2.2521, "step": 5865 }, { "epoch": 0.78, "grad_norm": 0.29296875, "learning_rate": 4.734573948829764e-05, "loss": 2.3026, "step": 5866 }, { "epoch": 0.78, "grad_norm": 0.302734375, "learning_rate": 4.734479029513148e-05, "loss": 2.2748, "step": 5867 }, { "epoch": 0.78, "grad_norm": 0.298828125, "learning_rate": 4.73438409417929e-05, "loss": 2.2757, "step": 5868 }, { "epoch": 0.78, "grad_norm": 0.2890625, "learning_rate": 4.7342891428288704e-05, "loss": 2.2707, "step": 5869 }, { "epoch": 0.78, "grad_norm": 0.306640625, "learning_rate": 4.734194175462569e-05, "loss": 2.2535, "step": 5870 }, { "epoch": 0.78, "grad_norm": 0.310546875, "learning_rate": 4.734099192081067e-05, "loss": 2.2966, "step": 5871 }, { "epoch": 0.78, "grad_norm": 0.296875, "learning_rate": 4.7340041926850464e-05, "loss": 2.2412, "step": 5872 }, { "epoch": 0.78, "grad_norm": 0.28515625, "learning_rate": 4.733909177275186e-05, "loss": 2.3083, "step": 5873 }, { "epoch": 0.78, "grad_norm": 0.3125, "learning_rate": 4.733814145852169e-05, "loss": 2.2747, "step": 5874 }, { "epoch": 0.78, "grad_norm": 0.28515625, "learning_rate": 4.7337190984166745e-05, "loss": 2.2898, "step": 5875 }, { "epoch": 0.78, "grad_norm": 0.29296875, "learning_rate": 4.733624034969386e-05, "loss": 2.2583, "step": 5876 }, { "epoch": 0.78, "grad_norm": 0.291015625, "learning_rate": 4.733528955510983e-05, "loss": 2.2314, "step": 5877 }, { "epoch": 0.78, "grad_norm": 0.287109375, "learning_rate": 4.733433860042148e-05, "loss": 2.2785, "step": 5878 }, { "epoch": 0.78, "grad_norm": 0.29296875, "learning_rate": 4.733338748563564e-05, "loss": 2.2538, "step": 5879 }, { "epoch": 0.78, "grad_norm": 0.302734375, "learning_rate": 4.73324362107591e-05, "loss": 2.2818, "step": 5880 }, { "epoch": 0.78, "grad_norm": 0.3046875, "learning_rate": 4.73314847757987e-05, "loss": 2.2789, "step": 5881 }, { "epoch": 0.78, "grad_norm": 0.291015625, "learning_rate": 4.733053318076125e-05, "loss": 2.2773, "step": 5882 }, { "epoch": 0.78, "grad_norm": 0.30859375, "learning_rate": 4.7329581425653575e-05, "loss": 2.2944, "step": 5883 }, { "epoch": 0.78, "grad_norm": 0.302734375, "learning_rate": 4.732862951048249e-05, "loss": 2.2841, "step": 5884 }, { "epoch": 0.79, "grad_norm": 0.296875, "learning_rate": 4.7327677435254834e-05, "loss": 2.2699, "step": 5885 }, { "epoch": 0.79, "grad_norm": 0.291015625, "learning_rate": 4.732672519997742e-05, "loss": 2.2471, "step": 5886 }, { "epoch": 0.79, "grad_norm": 0.31640625, "learning_rate": 4.732577280465708e-05, "loss": 2.3022, "step": 5887 }, { "epoch": 0.79, "grad_norm": 0.29296875, "learning_rate": 4.732482024930064e-05, "loss": 2.2465, "step": 5888 }, { "epoch": 0.79, "grad_norm": 0.30859375, "learning_rate": 4.732386753391491e-05, "loss": 2.242, "step": 5889 }, { "epoch": 0.79, "grad_norm": 0.29296875, "learning_rate": 4.732291465850675e-05, "loss": 2.2883, "step": 5890 }, { "epoch": 0.79, "grad_norm": 0.3046875, "learning_rate": 4.732196162308297e-05, "loss": 2.2604, "step": 5891 }, { "epoch": 0.79, "grad_norm": 0.28515625, "learning_rate": 4.732100842765041e-05, "loss": 2.2605, "step": 5892 }, { "epoch": 0.79, "grad_norm": 0.291015625, "learning_rate": 4.73200550722159e-05, "loss": 2.2448, "step": 5893 }, { "epoch": 0.79, "grad_norm": 0.3046875, "learning_rate": 4.7319101556786274e-05, "loss": 2.2885, "step": 5894 }, { "epoch": 0.79, "grad_norm": 0.28125, "learning_rate": 4.7318147881368355e-05, "loss": 2.2739, "step": 5895 }, { "epoch": 0.79, "grad_norm": 0.28515625, "learning_rate": 4.7317194045969005e-05, "loss": 2.2698, "step": 5896 }, { "epoch": 0.79, "grad_norm": 0.291015625, "learning_rate": 4.731624005059504e-05, "loss": 2.2441, "step": 5897 }, { "epoch": 0.79, "grad_norm": 0.283203125, "learning_rate": 4.7315285895253316e-05, "loss": 2.2374, "step": 5898 }, { "epoch": 0.79, "grad_norm": 0.302734375, "learning_rate": 4.731433157995065e-05, "loss": 2.2946, "step": 5899 }, { "epoch": 0.79, "grad_norm": 0.28515625, "learning_rate": 4.73133771046939e-05, "loss": 2.2487, "step": 5900 }, { "epoch": 0.79, "grad_norm": 0.2890625, "learning_rate": 4.73124224694899e-05, "loss": 2.2564, "step": 5901 }, { "epoch": 0.79, "grad_norm": 0.30078125, "learning_rate": 4.73114676743455e-05, "loss": 2.2845, "step": 5902 }, { "epoch": 0.79, "grad_norm": 0.283203125, "learning_rate": 4.731051271926754e-05, "loss": 2.254, "step": 5903 }, { "epoch": 0.79, "grad_norm": 0.28515625, "learning_rate": 4.730955760426286e-05, "loss": 2.2544, "step": 5904 }, { "epoch": 0.79, "grad_norm": 0.283203125, "learning_rate": 4.730860232933831e-05, "loss": 2.2664, "step": 5905 }, { "epoch": 0.79, "grad_norm": 0.298828125, "learning_rate": 4.730764689450075e-05, "loss": 2.2822, "step": 5906 }, { "epoch": 0.79, "grad_norm": 0.294921875, "learning_rate": 4.730669129975701e-05, "loss": 2.262, "step": 5907 }, { "epoch": 0.79, "grad_norm": 0.287109375, "learning_rate": 4.7305735545113954e-05, "loss": 2.2375, "step": 5908 }, { "epoch": 0.79, "grad_norm": 0.302734375, "learning_rate": 4.7304779630578425e-05, "loss": 2.2688, "step": 5909 }, { "epoch": 0.79, "grad_norm": 0.298828125, "learning_rate": 4.730382355615728e-05, "loss": 2.2479, "step": 5910 }, { "epoch": 0.79, "grad_norm": 0.306640625, "learning_rate": 4.730286732185736e-05, "loss": 2.2612, "step": 5911 }, { "epoch": 0.79, "grad_norm": 0.283203125, "learning_rate": 4.730191092768554e-05, "loss": 2.2565, "step": 5912 }, { "epoch": 0.79, "grad_norm": 0.318359375, "learning_rate": 4.730095437364866e-05, "loss": 2.2743, "step": 5913 }, { "epoch": 0.79, "grad_norm": 0.3046875, "learning_rate": 4.7299997659753586e-05, "loss": 2.2669, "step": 5914 }, { "epoch": 0.79, "grad_norm": 0.310546875, "learning_rate": 4.729904078600716e-05, "loss": 2.2729, "step": 5915 }, { "epoch": 0.79, "grad_norm": 0.2890625, "learning_rate": 4.7298083752416264e-05, "loss": 2.255, "step": 5916 }, { "epoch": 0.79, "grad_norm": 0.27734375, "learning_rate": 4.729712655898775e-05, "loss": 2.2709, "step": 5917 }, { "epoch": 0.79, "grad_norm": 0.296875, "learning_rate": 4.729616920572847e-05, "loss": 2.2382, "step": 5918 }, { "epoch": 0.79, "grad_norm": 0.333984375, "learning_rate": 4.729521169264529e-05, "loss": 2.2807, "step": 5919 }, { "epoch": 0.79, "grad_norm": 0.28125, "learning_rate": 4.729425401974508e-05, "loss": 2.232, "step": 5920 }, { "epoch": 0.79, "grad_norm": 0.283203125, "learning_rate": 4.72932961870347e-05, "loss": 2.2696, "step": 5921 }, { "epoch": 0.79, "grad_norm": 0.27734375, "learning_rate": 4.729233819452103e-05, "loss": 2.239, "step": 5922 }, { "epoch": 0.79, "grad_norm": 0.279296875, "learning_rate": 4.72913800422109e-05, "loss": 2.2795, "step": 5923 }, { "epoch": 0.79, "grad_norm": 0.3046875, "learning_rate": 4.7290421730111224e-05, "loss": 2.2502, "step": 5924 }, { "epoch": 0.79, "grad_norm": 0.2890625, "learning_rate": 4.7289463258228837e-05, "loss": 2.2666, "step": 5925 }, { "epoch": 0.79, "grad_norm": 0.28125, "learning_rate": 4.728850462657063e-05, "loss": 2.2673, "step": 5926 }, { "epoch": 0.79, "grad_norm": 0.314453125, "learning_rate": 4.7287545835143465e-05, "loss": 2.2951, "step": 5927 }, { "epoch": 0.79, "grad_norm": 0.275390625, "learning_rate": 4.7286586883954216e-05, "loss": 2.2643, "step": 5928 }, { "epoch": 0.79, "grad_norm": 0.2890625, "learning_rate": 4.7285627773009756e-05, "loss": 2.2705, "step": 5929 }, { "epoch": 0.79, "grad_norm": 0.294921875, "learning_rate": 4.728466850231696e-05, "loss": 2.2535, "step": 5930 }, { "epoch": 0.79, "grad_norm": 0.279296875, "learning_rate": 4.728370907188272e-05, "loss": 2.2346, "step": 5931 }, { "epoch": 0.79, "grad_norm": 0.27734375, "learning_rate": 4.728274948171389e-05, "loss": 2.2625, "step": 5932 }, { "epoch": 0.79, "grad_norm": 0.29296875, "learning_rate": 4.728178973181736e-05, "loss": 2.2484, "step": 5933 }, { "epoch": 0.79, "grad_norm": 0.294921875, "learning_rate": 4.7280829822200004e-05, "loss": 2.2549, "step": 5934 }, { "epoch": 0.79, "grad_norm": 0.302734375, "learning_rate": 4.727986975286871e-05, "loss": 2.2519, "step": 5935 }, { "epoch": 0.79, "grad_norm": 0.302734375, "learning_rate": 4.727890952383036e-05, "loss": 2.268, "step": 5936 }, { "epoch": 0.79, "grad_norm": 0.30078125, "learning_rate": 4.727794913509184e-05, "loss": 2.2529, "step": 5937 }, { "epoch": 0.79, "grad_norm": 0.294921875, "learning_rate": 4.7276988586660015e-05, "loss": 2.248, "step": 5938 }, { "epoch": 0.79, "grad_norm": 0.28515625, "learning_rate": 4.727602787854179e-05, "loss": 2.2722, "step": 5939 }, { "epoch": 0.79, "grad_norm": 0.287109375, "learning_rate": 4.7275067010744044e-05, "loss": 2.2243, "step": 5940 }, { "epoch": 0.79, "grad_norm": 0.296875, "learning_rate": 4.7274105983273666e-05, "loss": 2.294, "step": 5941 }, { "epoch": 0.79, "grad_norm": 0.30859375, "learning_rate": 4.727314479613755e-05, "loss": 2.2546, "step": 5942 }, { "epoch": 0.79, "grad_norm": 0.30078125, "learning_rate": 4.7272183449342574e-05, "loss": 2.265, "step": 5943 }, { "epoch": 0.79, "grad_norm": 0.296875, "learning_rate": 4.7271221942895637e-05, "loss": 2.2296, "step": 5944 }, { "epoch": 0.79, "grad_norm": 0.291015625, "learning_rate": 4.7270260276803636e-05, "loss": 2.2734, "step": 5945 }, { "epoch": 0.79, "grad_norm": 0.2890625, "learning_rate": 4.726929845107345e-05, "loss": 2.2726, "step": 5946 }, { "epoch": 0.79, "grad_norm": 0.283203125, "learning_rate": 4.726833646571199e-05, "loss": 2.2733, "step": 5947 }, { "epoch": 0.79, "grad_norm": 0.291015625, "learning_rate": 4.726737432072614e-05, "loss": 2.254, "step": 5948 }, { "epoch": 0.79, "grad_norm": 0.30859375, "learning_rate": 4.72664120161228e-05, "loss": 2.2377, "step": 5949 }, { "epoch": 0.79, "grad_norm": 0.283203125, "learning_rate": 4.726544955190888e-05, "loss": 2.276, "step": 5950 }, { "epoch": 0.79, "grad_norm": 0.29296875, "learning_rate": 4.726448692809126e-05, "loss": 2.2303, "step": 5951 }, { "epoch": 0.79, "grad_norm": 0.2890625, "learning_rate": 4.7263524144676844e-05, "loss": 2.2983, "step": 5952 }, { "epoch": 0.79, "grad_norm": 0.294921875, "learning_rate": 4.726256120167254e-05, "loss": 2.2559, "step": 5953 }, { "epoch": 0.79, "grad_norm": 0.291015625, "learning_rate": 4.7261598099085246e-05, "loss": 2.2456, "step": 5954 }, { "epoch": 0.79, "grad_norm": 0.287109375, "learning_rate": 4.726063483692188e-05, "loss": 2.2371, "step": 5955 }, { "epoch": 0.79, "grad_norm": 0.287109375, "learning_rate": 4.7259671415189325e-05, "loss": 2.2741, "step": 5956 }, { "epoch": 0.79, "grad_norm": 0.279296875, "learning_rate": 4.72587078338945e-05, "loss": 2.2597, "step": 5957 }, { "epoch": 0.79, "grad_norm": 0.29296875, "learning_rate": 4.7257744093044306e-05, "loss": 2.2431, "step": 5958 }, { "epoch": 0.79, "grad_norm": 0.30078125, "learning_rate": 4.725678019264567e-05, "loss": 2.2731, "step": 5959 }, { "epoch": 0.8, "grad_norm": 0.298828125, "learning_rate": 4.7255816132705465e-05, "loss": 2.2268, "step": 5960 }, { "epoch": 0.8, "grad_norm": 0.306640625, "learning_rate": 4.725485191323064e-05, "loss": 2.2355, "step": 5961 }, { "epoch": 0.8, "grad_norm": 0.3046875, "learning_rate": 4.7253887534228074e-05, "loss": 2.2522, "step": 5962 }, { "epoch": 0.8, "grad_norm": 0.287109375, "learning_rate": 4.725292299570471e-05, "loss": 2.285, "step": 5963 }, { "epoch": 0.8, "grad_norm": 0.279296875, "learning_rate": 4.725195829766743e-05, "loss": 2.2657, "step": 5964 }, { "epoch": 0.8, "grad_norm": 0.287109375, "learning_rate": 4.7250993440123184e-05, "loss": 2.2336, "step": 5965 }, { "epoch": 0.8, "grad_norm": 0.30078125, "learning_rate": 4.725002842307886e-05, "loss": 2.2334, "step": 5966 }, { "epoch": 0.8, "grad_norm": 0.287109375, "learning_rate": 4.7249063246541394e-05, "loss": 2.2812, "step": 5967 }, { "epoch": 0.8, "grad_norm": 0.2890625, "learning_rate": 4.7248097910517685e-05, "loss": 2.2595, "step": 5968 }, { "epoch": 0.8, "grad_norm": 0.28515625, "learning_rate": 4.724713241501467e-05, "loss": 2.2421, "step": 5969 }, { "epoch": 0.8, "grad_norm": 0.310546875, "learning_rate": 4.724616676003927e-05, "loss": 2.2533, "step": 5970 }, { "epoch": 0.8, "grad_norm": 0.306640625, "learning_rate": 4.72452009455984e-05, "loss": 2.2814, "step": 5971 }, { "epoch": 0.8, "grad_norm": 0.2890625, "learning_rate": 4.724423497169899e-05, "loss": 2.3019, "step": 5972 }, { "epoch": 0.8, "grad_norm": 0.298828125, "learning_rate": 4.724326883834794e-05, "loss": 2.2552, "step": 5973 }, { "epoch": 0.8, "grad_norm": 0.291015625, "learning_rate": 4.724230254555221e-05, "loss": 2.229, "step": 5974 }, { "epoch": 0.8, "grad_norm": 0.291015625, "learning_rate": 4.7241336093318714e-05, "loss": 2.2783, "step": 5975 }, { "epoch": 0.8, "grad_norm": 0.298828125, "learning_rate": 4.7240369481654367e-05, "loss": 2.2702, "step": 5976 }, { "epoch": 0.8, "grad_norm": 0.318359375, "learning_rate": 4.7239402710566106e-05, "loss": 2.2389, "step": 5977 }, { "epoch": 0.8, "grad_norm": 0.294921875, "learning_rate": 4.723843578006087e-05, "loss": 2.2475, "step": 5978 }, { "epoch": 0.8, "grad_norm": 0.298828125, "learning_rate": 4.723746869014558e-05, "loss": 2.2687, "step": 5979 }, { "epoch": 0.8, "grad_norm": 0.29296875, "learning_rate": 4.7236501440827174e-05, "loss": 2.2571, "step": 5980 }, { "epoch": 0.8, "grad_norm": 0.28515625, "learning_rate": 4.723553403211258e-05, "loss": 2.2944, "step": 5981 }, { "epoch": 0.8, "grad_norm": 0.296875, "learning_rate": 4.723456646400873e-05, "loss": 2.2577, "step": 5982 }, { "epoch": 0.8, "grad_norm": 0.2890625, "learning_rate": 4.723359873652258e-05, "loss": 2.2583, "step": 5983 }, { "epoch": 0.8, "grad_norm": 0.3203125, "learning_rate": 4.723263084966104e-05, "loss": 2.2409, "step": 5984 }, { "epoch": 0.8, "grad_norm": 0.296875, "learning_rate": 4.723166280343106e-05, "loss": 2.2761, "step": 5985 }, { "epoch": 0.8, "grad_norm": 0.291015625, "learning_rate": 4.723069459783958e-05, "loss": 2.2603, "step": 5986 }, { "epoch": 0.8, "grad_norm": 0.296875, "learning_rate": 4.722972623289354e-05, "loss": 2.2306, "step": 5987 }, { "epoch": 0.8, "grad_norm": 0.287109375, "learning_rate": 4.7228757708599886e-05, "loss": 2.2536, "step": 5988 }, { "epoch": 0.8, "grad_norm": 0.298828125, "learning_rate": 4.722778902496555e-05, "loss": 2.2682, "step": 5989 }, { "epoch": 0.8, "grad_norm": 0.287109375, "learning_rate": 4.7226820181997485e-05, "loss": 2.2548, "step": 5990 }, { "epoch": 0.8, "grad_norm": 0.298828125, "learning_rate": 4.722585117970263e-05, "loss": 2.2479, "step": 5991 }, { "epoch": 0.8, "grad_norm": 0.298828125, "learning_rate": 4.7224882018087934e-05, "loss": 2.2717, "step": 5992 }, { "epoch": 0.8, "grad_norm": 0.294921875, "learning_rate": 4.722391269716034e-05, "loss": 2.2784, "step": 5993 }, { "epoch": 0.8, "grad_norm": 0.27734375, "learning_rate": 4.72229432169268e-05, "loss": 2.2686, "step": 5994 }, { "epoch": 0.8, "grad_norm": 0.29296875, "learning_rate": 4.722197357739426e-05, "loss": 2.2617, "step": 5995 }, { "epoch": 0.8, "grad_norm": 0.302734375, "learning_rate": 4.722100377856968e-05, "loss": 2.2512, "step": 5996 }, { "epoch": 0.8, "grad_norm": 0.28125, "learning_rate": 4.722003382046001e-05, "loss": 2.2572, "step": 5997 }, { "epoch": 0.8, "grad_norm": 0.29296875, "learning_rate": 4.721906370307219e-05, "loss": 2.2632, "step": 5998 }, { "epoch": 0.8, "grad_norm": 0.298828125, "learning_rate": 4.7218093426413186e-05, "loss": 2.2739, "step": 5999 }, { "epoch": 0.8, "grad_norm": 0.298828125, "learning_rate": 4.721712299048995e-05, "loss": 2.2654, "step": 6000 }, { "epoch": 0.8, "eval_loss": 2.2597622871398926, "eval_runtime": 615.0597, "eval_samples_per_second": 63.036, "eval_steps_per_second": 7.881, "step": 6000 }, { "epoch": 0.8, "grad_norm": 0.28125, "learning_rate": 4.7216152395309435e-05, "loss": 2.2854, "step": 6001 }, { "epoch": 0.8, "grad_norm": 0.2890625, "learning_rate": 4.72151816408786e-05, "loss": 2.2725, "step": 6002 }, { "epoch": 0.8, "grad_norm": 0.3046875, "learning_rate": 4.72142107272044e-05, "loss": 2.2403, "step": 6003 }, { "epoch": 0.8, "grad_norm": 0.310546875, "learning_rate": 4.721323965429381e-05, "loss": 2.2647, "step": 6004 }, { "epoch": 0.8, "grad_norm": 0.28515625, "learning_rate": 4.7212268422153775e-05, "loss": 2.2748, "step": 6005 }, { "epoch": 0.8, "grad_norm": 0.310546875, "learning_rate": 4.7211297030791265e-05, "loss": 2.2727, "step": 6006 }, { "epoch": 0.8, "grad_norm": 0.29296875, "learning_rate": 4.721032548021324e-05, "loss": 2.2811, "step": 6007 }, { "epoch": 0.8, "grad_norm": 0.294921875, "learning_rate": 4.7209353770426665e-05, "loss": 2.2469, "step": 6008 }, { "epoch": 0.8, "grad_norm": 0.3125, "learning_rate": 4.7208381901438505e-05, "loss": 2.2513, "step": 6009 }, { "epoch": 0.8, "grad_norm": 0.298828125, "learning_rate": 4.720740987325573e-05, "loss": 2.2724, "step": 6010 }, { "epoch": 0.8, "grad_norm": 0.296875, "learning_rate": 4.72064376858853e-05, "loss": 2.2621, "step": 6011 }, { "epoch": 0.8, "grad_norm": 0.310546875, "learning_rate": 4.7205465339334184e-05, "loss": 2.2716, "step": 6012 }, { "epoch": 0.8, "grad_norm": 0.302734375, "learning_rate": 4.720449283360936e-05, "loss": 2.2524, "step": 6013 }, { "epoch": 0.8, "grad_norm": 0.296875, "learning_rate": 4.7203520168717793e-05, "loss": 2.2573, "step": 6014 }, { "epoch": 0.8, "grad_norm": 0.28515625, "learning_rate": 4.7202547344666464e-05, "loss": 2.2777, "step": 6015 }, { "epoch": 0.8, "grad_norm": 0.302734375, "learning_rate": 4.720157436146234e-05, "loss": 2.2558, "step": 6016 }, { "epoch": 0.8, "grad_norm": 0.27734375, "learning_rate": 4.720060121911239e-05, "loss": 2.2448, "step": 6017 }, { "epoch": 0.8, "grad_norm": 0.283203125, "learning_rate": 4.7199627917623604e-05, "loss": 2.2541, "step": 6018 }, { "epoch": 0.8, "grad_norm": 0.296875, "learning_rate": 4.7198654457002946e-05, "loss": 2.2534, "step": 6019 }, { "epoch": 0.8, "grad_norm": 0.28515625, "learning_rate": 4.71976808372574e-05, "loss": 2.2494, "step": 6020 }, { "epoch": 0.8, "grad_norm": 0.28515625, "learning_rate": 4.719670705839393e-05, "loss": 2.2516, "step": 6021 }, { "epoch": 0.8, "grad_norm": 0.296875, "learning_rate": 4.7195733120419547e-05, "loss": 2.2533, "step": 6022 }, { "epoch": 0.8, "grad_norm": 0.28515625, "learning_rate": 4.7194759023341204e-05, "loss": 2.2686, "step": 6023 }, { "epoch": 0.8, "grad_norm": 0.28515625, "learning_rate": 4.71937847671659e-05, "loss": 2.2721, "step": 6024 }, { "epoch": 0.8, "grad_norm": 0.287109375, "learning_rate": 4.719281035190061e-05, "loss": 2.3096, "step": 6025 }, { "epoch": 0.8, "grad_norm": 0.3046875, "learning_rate": 4.719183577755233e-05, "loss": 2.264, "step": 6026 }, { "epoch": 0.8, "grad_norm": 0.28125, "learning_rate": 4.7190861044128034e-05, "loss": 2.2468, "step": 6027 }, { "epoch": 0.8, "grad_norm": 0.30078125, "learning_rate": 4.718988615163471e-05, "loss": 2.2707, "step": 6028 }, { "epoch": 0.8, "grad_norm": 0.302734375, "learning_rate": 4.718891110007935e-05, "loss": 2.2474, "step": 6029 }, { "epoch": 0.8, "grad_norm": 0.302734375, "learning_rate": 4.7187935889468944e-05, "loss": 2.2366, "step": 6030 }, { "epoch": 0.8, "grad_norm": 0.31640625, "learning_rate": 4.718696051981048e-05, "loss": 2.2992, "step": 6031 }, { "epoch": 0.8, "grad_norm": 0.296875, "learning_rate": 4.7185984991110956e-05, "loss": 2.2608, "step": 6032 }, { "epoch": 0.8, "grad_norm": 0.30859375, "learning_rate": 4.718500930337735e-05, "loss": 2.2922, "step": 6033 }, { "epoch": 0.8, "grad_norm": 0.2890625, "learning_rate": 4.718403345661667e-05, "loss": 2.2491, "step": 6034 }, { "epoch": 0.81, "grad_norm": 0.291015625, "learning_rate": 4.718305745083591e-05, "loss": 2.2489, "step": 6035 }, { "epoch": 0.81, "grad_norm": 0.302734375, "learning_rate": 4.718208128604207e-05, "loss": 2.2729, "step": 6036 }, { "epoch": 0.81, "grad_norm": 0.2890625, "learning_rate": 4.7181104962242126e-05, "loss": 2.2448, "step": 6037 }, { "epoch": 0.81, "grad_norm": 0.28125, "learning_rate": 4.71801284794431e-05, "loss": 2.2675, "step": 6038 }, { "epoch": 0.81, "grad_norm": 0.287109375, "learning_rate": 4.717915183765198e-05, "loss": 2.27, "step": 6039 }, { "epoch": 0.81, "grad_norm": 0.306640625, "learning_rate": 4.717817503687577e-05, "loss": 2.2386, "step": 6040 }, { "epoch": 0.81, "grad_norm": 0.2734375, "learning_rate": 4.717719807712146e-05, "loss": 2.2613, "step": 6041 }, { "epoch": 0.81, "grad_norm": 0.279296875, "learning_rate": 4.717622095839608e-05, "loss": 2.2515, "step": 6042 }, { "epoch": 0.81, "grad_norm": 0.3046875, "learning_rate": 4.7175243680706616e-05, "loss": 2.2424, "step": 6043 }, { "epoch": 0.81, "grad_norm": 0.2734375, "learning_rate": 4.7174266244060076e-05, "loss": 2.2601, "step": 6044 }, { "epoch": 0.81, "grad_norm": 0.287109375, "learning_rate": 4.7173288648463466e-05, "loss": 2.2466, "step": 6045 }, { "epoch": 0.81, "grad_norm": 0.302734375, "learning_rate": 4.7172310893923786e-05, "loss": 2.2231, "step": 6046 }, { "epoch": 0.81, "grad_norm": 0.28515625, "learning_rate": 4.717133298044806e-05, "loss": 2.2625, "step": 6047 }, { "epoch": 0.81, "grad_norm": 0.29296875, "learning_rate": 4.717035490804329e-05, "loss": 2.2732, "step": 6048 }, { "epoch": 0.81, "grad_norm": 0.291015625, "learning_rate": 4.7169376676716486e-05, "loss": 2.2513, "step": 6049 }, { "epoch": 0.81, "grad_norm": 0.279296875, "learning_rate": 4.716839828647466e-05, "loss": 2.2859, "step": 6050 }, { "epoch": 0.81, "grad_norm": 0.28125, "learning_rate": 4.716741973732484e-05, "loss": 2.2583, "step": 6051 }, { "epoch": 0.81, "grad_norm": 0.296875, "learning_rate": 4.716644102927401e-05, "loss": 2.2708, "step": 6052 }, { "epoch": 0.81, "grad_norm": 0.294921875, "learning_rate": 4.716546216232921e-05, "loss": 2.2774, "step": 6053 }, { "epoch": 0.81, "grad_norm": 0.30078125, "learning_rate": 4.716448313649745e-05, "loss": 2.269, "step": 6054 }, { "epoch": 0.81, "grad_norm": 0.302734375, "learning_rate": 4.7163503951785754e-05, "loss": 2.2485, "step": 6055 }, { "epoch": 0.81, "grad_norm": 0.2890625, "learning_rate": 4.716252460820113e-05, "loss": 2.2214, "step": 6056 }, { "epoch": 0.81, "grad_norm": 0.28125, "learning_rate": 4.71615451057506e-05, "loss": 2.2483, "step": 6057 }, { "epoch": 0.81, "grad_norm": 0.30078125, "learning_rate": 4.71605654444412e-05, "loss": 2.2278, "step": 6058 }, { "epoch": 0.81, "grad_norm": 0.3046875, "learning_rate": 4.7159585624279925e-05, "loss": 2.2307, "step": 6059 }, { "epoch": 0.81, "grad_norm": 0.283203125, "learning_rate": 4.7158605645273826e-05, "loss": 2.2195, "step": 6060 }, { "epoch": 0.81, "grad_norm": 0.271484375, "learning_rate": 4.7157625507429906e-05, "loss": 2.2546, "step": 6061 }, { "epoch": 0.81, "grad_norm": 0.28125, "learning_rate": 4.715664521075521e-05, "loss": 2.246, "step": 6062 }, { "epoch": 0.81, "grad_norm": 0.271484375, "learning_rate": 4.715566475525675e-05, "loss": 2.2925, "step": 6063 }, { "epoch": 0.81, "grad_norm": 0.296875, "learning_rate": 4.715468414094156e-05, "loss": 2.2619, "step": 6064 }, { "epoch": 0.81, "grad_norm": 0.28125, "learning_rate": 4.7153703367816675e-05, "loss": 2.2598, "step": 6065 }, { "epoch": 0.81, "grad_norm": 0.298828125, "learning_rate": 4.715272243588912e-05, "loss": 2.2222, "step": 6066 }, { "epoch": 0.81, "grad_norm": 0.291015625, "learning_rate": 4.715174134516592e-05, "loss": 2.2653, "step": 6067 }, { "epoch": 0.81, "grad_norm": 0.30078125, "learning_rate": 4.7150760095654116e-05, "loss": 2.3005, "step": 6068 }, { "epoch": 0.81, "grad_norm": 0.27734375, "learning_rate": 4.714977868736074e-05, "loss": 2.2699, "step": 6069 }, { "epoch": 0.81, "grad_norm": 0.29296875, "learning_rate": 4.714879712029283e-05, "loss": 2.2658, "step": 6070 }, { "epoch": 0.81, "grad_norm": 0.2890625, "learning_rate": 4.714781539445741e-05, "loss": 2.2528, "step": 6071 }, { "epoch": 0.81, "grad_norm": 0.28125, "learning_rate": 4.714683350986153e-05, "loss": 2.2606, "step": 6072 }, { "epoch": 0.81, "grad_norm": 0.2890625, "learning_rate": 4.714585146651223e-05, "loss": 2.2663, "step": 6073 }, { "epoch": 0.81, "grad_norm": 0.287109375, "learning_rate": 4.714486926441654e-05, "loss": 2.2255, "step": 6074 }, { "epoch": 0.81, "grad_norm": 0.294921875, "learning_rate": 4.71438869035815e-05, "loss": 2.2593, "step": 6075 }, { "epoch": 0.81, "grad_norm": 0.29296875, "learning_rate": 4.714290438401417e-05, "loss": 2.2572, "step": 6076 }, { "epoch": 0.81, "grad_norm": 0.3125, "learning_rate": 4.714192170572157e-05, "loss": 2.2559, "step": 6077 }, { "epoch": 0.81, "grad_norm": 0.302734375, "learning_rate": 4.714093886871075e-05, "loss": 2.2685, "step": 6078 }, { "epoch": 0.81, "grad_norm": 0.296875, "learning_rate": 4.7139955872988764e-05, "loss": 2.2491, "step": 6079 }, { "epoch": 0.81, "grad_norm": 0.28125, "learning_rate": 4.713897271856265e-05, "loss": 2.2601, "step": 6080 }, { "epoch": 0.81, "grad_norm": 0.28515625, "learning_rate": 4.7137989405439454e-05, "loss": 2.2967, "step": 6081 }, { "epoch": 0.81, "grad_norm": 0.3125, "learning_rate": 4.713700593362623e-05, "loss": 2.2542, "step": 6082 }, { "epoch": 0.81, "grad_norm": 0.302734375, "learning_rate": 4.7136022303130036e-05, "loss": 2.2637, "step": 6083 }, { "epoch": 0.81, "grad_norm": 0.287109375, "learning_rate": 4.713503851395791e-05, "loss": 2.2604, "step": 6084 }, { "epoch": 0.81, "grad_norm": 0.3046875, "learning_rate": 4.71340545661169e-05, "loss": 2.2357, "step": 6085 }, { "epoch": 0.81, "grad_norm": 0.27734375, "learning_rate": 4.713307045961407e-05, "loss": 2.2502, "step": 6086 }, { "epoch": 0.81, "grad_norm": 0.2890625, "learning_rate": 4.713208619445647e-05, "loss": 2.26, "step": 6087 }, { "epoch": 0.81, "grad_norm": 0.298828125, "learning_rate": 4.7131101770651165e-05, "loss": 2.2776, "step": 6088 }, { "epoch": 0.81, "grad_norm": 0.3046875, "learning_rate": 4.713011718820519e-05, "loss": 2.2377, "step": 6089 }, { "epoch": 0.81, "grad_norm": 0.2890625, "learning_rate": 4.712913244712562e-05, "loss": 2.2784, "step": 6090 }, { "epoch": 0.81, "grad_norm": 0.296875, "learning_rate": 4.712814754741952e-05, "loss": 2.2657, "step": 6091 }, { "epoch": 0.81, "grad_norm": 0.291015625, "learning_rate": 4.712716248909393e-05, "loss": 2.2144, "step": 6092 }, { "epoch": 0.81, "grad_norm": 0.30859375, "learning_rate": 4.712617727215593e-05, "loss": 2.282, "step": 6093 }, { "epoch": 0.81, "grad_norm": 0.291015625, "learning_rate": 4.712519189661255e-05, "loss": 2.2254, "step": 6094 }, { "epoch": 0.81, "grad_norm": 0.294921875, "learning_rate": 4.71242063624709e-05, "loss": 2.2548, "step": 6095 }, { "epoch": 0.81, "grad_norm": 0.28515625, "learning_rate": 4.712322066973801e-05, "loss": 2.2397, "step": 6096 }, { "epoch": 0.81, "grad_norm": 0.28125, "learning_rate": 4.7122234818420954e-05, "loss": 2.2502, "step": 6097 }, { "epoch": 0.81, "grad_norm": 0.29296875, "learning_rate": 4.7121248808526806e-05, "loss": 2.2691, "step": 6098 }, { "epoch": 0.81, "grad_norm": 0.29296875, "learning_rate": 4.712026264006263e-05, "loss": 2.2648, "step": 6099 }, { "epoch": 0.81, "grad_norm": 0.306640625, "learning_rate": 4.7119276313035495e-05, "loss": 2.2856, "step": 6100 }, { "epoch": 0.81, "grad_norm": 0.291015625, "learning_rate": 4.711828982745247e-05, "loss": 2.25, "step": 6101 }, { "epoch": 0.81, "grad_norm": 0.3125, "learning_rate": 4.711730318332063e-05, "loss": 2.2919, "step": 6102 }, { "epoch": 0.81, "grad_norm": 0.28515625, "learning_rate": 4.711631638064703e-05, "loss": 2.2655, "step": 6103 }, { "epoch": 0.81, "grad_norm": 0.296875, "learning_rate": 4.711532941943877e-05, "loss": 2.2648, "step": 6104 }, { "epoch": 0.81, "grad_norm": 0.287109375, "learning_rate": 4.711434229970291e-05, "loss": 2.2767, "step": 6105 }, { "epoch": 0.81, "grad_norm": 0.283203125, "learning_rate": 4.711335502144653e-05, "loss": 2.2811, "step": 6106 }, { "epoch": 0.81, "grad_norm": 0.27734375, "learning_rate": 4.7112367584676706e-05, "loss": 2.2566, "step": 6107 }, { "epoch": 0.81, "grad_norm": 0.29296875, "learning_rate": 4.711137998940051e-05, "loss": 2.2476, "step": 6108 }, { "epoch": 0.81, "grad_norm": 0.2890625, "learning_rate": 4.711039223562504e-05, "loss": 2.2829, "step": 6109 }, { "epoch": 0.82, "grad_norm": 0.2890625, "learning_rate": 4.7109404323357346e-05, "loss": 2.2573, "step": 6110 }, { "epoch": 0.82, "grad_norm": 0.287109375, "learning_rate": 4.710841625260454e-05, "loss": 2.2649, "step": 6111 }, { "epoch": 0.82, "grad_norm": 0.279296875, "learning_rate": 4.710742802337368e-05, "loss": 2.3093, "step": 6112 }, { "epoch": 0.82, "grad_norm": 0.296875, "learning_rate": 4.7106439635671874e-05, "loss": 2.2493, "step": 6113 }, { "epoch": 0.82, "grad_norm": 0.306640625, "learning_rate": 4.710545108950619e-05, "loss": 2.3055, "step": 6114 }, { "epoch": 0.82, "grad_norm": 0.28515625, "learning_rate": 4.710446238488372e-05, "loss": 2.2738, "step": 6115 }, { "epoch": 0.82, "grad_norm": 0.28125, "learning_rate": 4.710347352181155e-05, "loss": 2.2768, "step": 6116 }, { "epoch": 0.82, "grad_norm": 0.29296875, "learning_rate": 4.710248450029676e-05, "loss": 2.2435, "step": 6117 }, { "epoch": 0.82, "grad_norm": 0.2890625, "learning_rate": 4.7101495320346454e-05, "loss": 2.2663, "step": 6118 }, { "epoch": 0.82, "grad_norm": 0.2890625, "learning_rate": 4.7100505981967716e-05, "loss": 2.2597, "step": 6119 }, { "epoch": 0.82, "grad_norm": 0.2890625, "learning_rate": 4.709951648516765e-05, "loss": 2.2547, "step": 6120 }, { "epoch": 0.82, "grad_norm": 0.28515625, "learning_rate": 4.709852682995332e-05, "loss": 2.2603, "step": 6121 }, { "epoch": 0.82, "grad_norm": 0.283203125, "learning_rate": 4.709753701633184e-05, "loss": 2.2505, "step": 6122 }, { "epoch": 0.82, "grad_norm": 0.291015625, "learning_rate": 4.7096547044310314e-05, "loss": 2.2787, "step": 6123 }, { "epoch": 0.82, "grad_norm": 0.28515625, "learning_rate": 4.7095556913895814e-05, "loss": 2.2528, "step": 6124 }, { "epoch": 0.82, "grad_norm": 0.30859375, "learning_rate": 4.7094566625095456e-05, "loss": 2.254, "step": 6125 }, { "epoch": 0.82, "grad_norm": 0.287109375, "learning_rate": 4.709357617791633e-05, "loss": 2.2646, "step": 6126 }, { "epoch": 0.82, "grad_norm": 0.298828125, "learning_rate": 4.709258557236554e-05, "loss": 2.2482, "step": 6127 }, { "epoch": 0.82, "grad_norm": 0.306640625, "learning_rate": 4.7091594808450195e-05, "loss": 2.2456, "step": 6128 }, { "epoch": 0.82, "grad_norm": 0.29296875, "learning_rate": 4.7090603886177376e-05, "loss": 2.2597, "step": 6129 }, { "epoch": 0.82, "grad_norm": 0.294921875, "learning_rate": 4.708961280555421e-05, "loss": 2.2766, "step": 6130 }, { "epoch": 0.82, "grad_norm": 0.302734375, "learning_rate": 4.708862156658778e-05, "loss": 2.2824, "step": 6131 }, { "epoch": 0.82, "grad_norm": 0.28515625, "learning_rate": 4.7087630169285194e-05, "loss": 2.2721, "step": 6132 }, { "epoch": 0.82, "grad_norm": 0.298828125, "learning_rate": 4.708663861365358e-05, "loss": 2.2595, "step": 6133 }, { "epoch": 0.82, "grad_norm": 0.326171875, "learning_rate": 4.708564689970002e-05, "loss": 2.2439, "step": 6134 }, { "epoch": 0.82, "grad_norm": 0.3046875, "learning_rate": 4.708465502743163e-05, "loss": 2.2643, "step": 6135 }, { "epoch": 0.82, "grad_norm": 0.28515625, "learning_rate": 4.708366299685554e-05, "loss": 2.2772, "step": 6136 }, { "epoch": 0.82, "grad_norm": 0.306640625, "learning_rate": 4.708267080797883e-05, "loss": 2.2688, "step": 6137 }, { "epoch": 0.82, "grad_norm": 0.3125, "learning_rate": 4.708167846080863e-05, "loss": 2.2391, "step": 6138 }, { "epoch": 0.82, "grad_norm": 0.28515625, "learning_rate": 4.708068595535205e-05, "loss": 2.3165, "step": 6139 }, { "epoch": 0.82, "grad_norm": 0.302734375, "learning_rate": 4.707969329161621e-05, "loss": 2.2358, "step": 6140 }, { "epoch": 0.82, "grad_norm": 0.27734375, "learning_rate": 4.707870046960822e-05, "loss": 2.2311, "step": 6141 }, { "epoch": 0.82, "grad_norm": 0.287109375, "learning_rate": 4.707770748933519e-05, "loss": 2.2465, "step": 6142 }, { "epoch": 0.82, "grad_norm": 0.279296875, "learning_rate": 4.707671435080425e-05, "loss": 2.3016, "step": 6143 }, { "epoch": 0.82, "grad_norm": 0.30859375, "learning_rate": 4.707572105402251e-05, "loss": 2.2691, "step": 6144 }, { "epoch": 0.82, "grad_norm": 0.306640625, "learning_rate": 4.70747275989971e-05, "loss": 2.2652, "step": 6145 }, { "epoch": 0.82, "grad_norm": 0.30078125, "learning_rate": 4.7073733985735134e-05, "loss": 2.2739, "step": 6146 }, { "epoch": 0.82, "grad_norm": 0.30078125, "learning_rate": 4.707274021424374e-05, "loss": 2.2606, "step": 6147 }, { "epoch": 0.82, "grad_norm": 0.296875, "learning_rate": 4.707174628453003e-05, "loss": 2.2868, "step": 6148 }, { "epoch": 0.82, "grad_norm": 0.2890625, "learning_rate": 4.7070752196601137e-05, "loss": 2.2989, "step": 6149 }, { "epoch": 0.82, "grad_norm": 0.294921875, "learning_rate": 4.706975795046419e-05, "loss": 2.257, "step": 6150 }, { "epoch": 0.82, "grad_norm": 0.28515625, "learning_rate": 4.7068763546126307e-05, "loss": 2.2508, "step": 6151 }, { "epoch": 0.82, "grad_norm": 0.296875, "learning_rate": 4.706776898359463e-05, "loss": 2.2765, "step": 6152 }, { "epoch": 0.82, "grad_norm": 0.30078125, "learning_rate": 4.7066774262876265e-05, "loss": 2.2445, "step": 6153 }, { "epoch": 0.82, "grad_norm": 0.30859375, "learning_rate": 4.706577938397837e-05, "loss": 2.2691, "step": 6154 }, { "epoch": 0.82, "grad_norm": 0.302734375, "learning_rate": 4.7064784346908056e-05, "loss": 2.2438, "step": 6155 }, { "epoch": 0.82, "grad_norm": 0.30078125, "learning_rate": 4.706378915167247e-05, "loss": 2.2649, "step": 6156 }, { "epoch": 0.82, "grad_norm": 0.283203125, "learning_rate": 4.7062793798278734e-05, "loss": 2.2484, "step": 6157 }, { "epoch": 0.82, "grad_norm": 0.298828125, "learning_rate": 4.706179828673399e-05, "loss": 2.2164, "step": 6158 }, { "epoch": 0.82, "grad_norm": 0.28515625, "learning_rate": 4.706080261704536e-05, "loss": 2.265, "step": 6159 }, { "epoch": 0.82, "grad_norm": 0.296875, "learning_rate": 4.7059806789220004e-05, "loss": 2.2815, "step": 6160 }, { "epoch": 0.82, "grad_norm": 0.29296875, "learning_rate": 4.705881080326505e-05, "loss": 2.2872, "step": 6161 }, { "epoch": 0.82, "grad_norm": 0.28515625, "learning_rate": 4.705781465918764e-05, "loss": 2.2801, "step": 6162 }, { "epoch": 0.82, "grad_norm": 0.310546875, "learning_rate": 4.7056818356994906e-05, "loss": 2.2607, "step": 6163 }, { "epoch": 0.82, "grad_norm": 0.287109375, "learning_rate": 4.7055821896693996e-05, "loss": 2.2698, "step": 6164 }, { "epoch": 0.82, "grad_norm": 0.291015625, "learning_rate": 4.705482527829205e-05, "loss": 2.2705, "step": 6165 }, { "epoch": 0.82, "grad_norm": 0.302734375, "learning_rate": 4.705382850179621e-05, "loss": 2.231, "step": 6166 }, { "epoch": 0.82, "grad_norm": 0.30078125, "learning_rate": 4.705283156721363e-05, "loss": 2.2611, "step": 6167 }, { "epoch": 0.82, "grad_norm": 0.291015625, "learning_rate": 4.705183447455145e-05, "loss": 2.2545, "step": 6168 }, { "epoch": 0.82, "grad_norm": 0.291015625, "learning_rate": 4.705083722381682e-05, "loss": 2.2757, "step": 6169 }, { "epoch": 0.82, "grad_norm": 0.287109375, "learning_rate": 4.704983981501689e-05, "loss": 2.259, "step": 6170 }, { "epoch": 0.82, "grad_norm": 0.306640625, "learning_rate": 4.70488422481588e-05, "loss": 2.2692, "step": 6171 }, { "epoch": 0.82, "grad_norm": 0.291015625, "learning_rate": 4.704784452324971e-05, "loss": 2.2788, "step": 6172 }, { "epoch": 0.82, "grad_norm": 0.2890625, "learning_rate": 4.704684664029677e-05, "loss": 2.2833, "step": 6173 }, { "epoch": 0.82, "grad_norm": 0.298828125, "learning_rate": 4.704584859930714e-05, "loss": 2.2445, "step": 6174 }, { "epoch": 0.82, "grad_norm": 0.28515625, "learning_rate": 4.7044850400287954e-05, "loss": 2.2572, "step": 6175 }, { "epoch": 0.82, "grad_norm": 0.298828125, "learning_rate": 4.7043852043246386e-05, "loss": 2.2799, "step": 6176 }, { "epoch": 0.82, "grad_norm": 0.2890625, "learning_rate": 4.704285352818958e-05, "loss": 2.249, "step": 6177 }, { "epoch": 0.82, "grad_norm": 0.29296875, "learning_rate": 4.704185485512471e-05, "loss": 2.2692, "step": 6178 }, { "epoch": 0.82, "grad_norm": 0.28515625, "learning_rate": 4.704085602405892e-05, "loss": 2.2413, "step": 6179 }, { "epoch": 0.82, "grad_norm": 0.294921875, "learning_rate": 4.703985703499937e-05, "loss": 2.2728, "step": 6180 }, { "epoch": 0.82, "grad_norm": 0.298828125, "learning_rate": 4.7038857887953235e-05, "loss": 2.2735, "step": 6181 }, { "epoch": 0.82, "grad_norm": 0.294921875, "learning_rate": 4.7037858582927665e-05, "loss": 2.2488, "step": 6182 }, { "epoch": 0.82, "grad_norm": 0.287109375, "learning_rate": 4.703685911992982e-05, "loss": 2.3037, "step": 6183 }, { "epoch": 0.82, "grad_norm": 0.30859375, "learning_rate": 4.703585949896687e-05, "loss": 2.2589, "step": 6184 }, { "epoch": 0.83, "grad_norm": 0.31640625, "learning_rate": 4.7034859720045985e-05, "loss": 2.2731, "step": 6185 }, { "epoch": 0.83, "grad_norm": 0.27734375, "learning_rate": 4.7033859783174325e-05, "loss": 2.2983, "step": 6186 }, { "epoch": 0.83, "grad_norm": 0.302734375, "learning_rate": 4.703285968835907e-05, "loss": 2.2356, "step": 6187 }, { "epoch": 0.83, "grad_norm": 0.298828125, "learning_rate": 4.703185943560737e-05, "loss": 2.2236, "step": 6188 }, { "epoch": 0.83, "grad_norm": 0.306640625, "learning_rate": 4.7030859024926396e-05, "loss": 2.2447, "step": 6189 }, { "epoch": 0.83, "grad_norm": 0.283203125, "learning_rate": 4.702985845632334e-05, "loss": 2.2288, "step": 6190 }, { "epoch": 0.83, "grad_norm": 0.291015625, "learning_rate": 4.702885772980536e-05, "loss": 2.251, "step": 6191 }, { "epoch": 0.83, "grad_norm": 0.28515625, "learning_rate": 4.7027856845379624e-05, "loss": 2.281, "step": 6192 }, { "epoch": 0.83, "grad_norm": 0.296875, "learning_rate": 4.702685580305332e-05, "loss": 2.3007, "step": 6193 }, { "epoch": 0.83, "grad_norm": 0.2890625, "learning_rate": 4.702585460283362e-05, "loss": 2.2667, "step": 6194 }, { "epoch": 0.83, "grad_norm": 0.302734375, "learning_rate": 4.702485324472769e-05, "loss": 2.249, "step": 6195 }, { "epoch": 0.83, "grad_norm": 0.302734375, "learning_rate": 4.702385172874272e-05, "loss": 2.2529, "step": 6196 }, { "epoch": 0.83, "grad_norm": 0.30078125, "learning_rate": 4.702285005488588e-05, "loss": 2.2756, "step": 6197 }, { "epoch": 0.83, "grad_norm": 0.287109375, "learning_rate": 4.7021848223164364e-05, "loss": 2.3088, "step": 6198 }, { "epoch": 0.83, "grad_norm": 0.287109375, "learning_rate": 4.7020846233585335e-05, "loss": 2.2879, "step": 6199 }, { "epoch": 0.83, "grad_norm": 0.28515625, "learning_rate": 4.7019844086156e-05, "loss": 2.2805, "step": 6200 }, { "epoch": 0.83, "grad_norm": 0.29296875, "learning_rate": 4.701884178088351e-05, "loss": 2.2488, "step": 6201 }, { "epoch": 0.83, "grad_norm": 0.291015625, "learning_rate": 4.701783931777508e-05, "loss": 2.2503, "step": 6202 }, { "epoch": 0.83, "grad_norm": 0.2890625, "learning_rate": 4.701683669683788e-05, "loss": 2.2702, "step": 6203 }, { "epoch": 0.83, "grad_norm": 0.3046875, "learning_rate": 4.70158339180791e-05, "loss": 2.2647, "step": 6204 }, { "epoch": 0.83, "grad_norm": 0.291015625, "learning_rate": 4.701483098150593e-05, "loss": 2.2329, "step": 6205 }, { "epoch": 0.83, "grad_norm": 0.2890625, "learning_rate": 4.701382788712555e-05, "loss": 2.2492, "step": 6206 }, { "epoch": 0.83, "grad_norm": 0.294921875, "learning_rate": 4.701282463494517e-05, "loss": 2.2732, "step": 6207 }, { "epoch": 0.83, "grad_norm": 0.28515625, "learning_rate": 4.7011821224971965e-05, "loss": 2.248, "step": 6208 }, { "epoch": 0.83, "grad_norm": 0.279296875, "learning_rate": 4.701081765721312e-05, "loss": 2.2901, "step": 6209 }, { "epoch": 0.83, "grad_norm": 0.27734375, "learning_rate": 4.700981393167586e-05, "loss": 2.2759, "step": 6210 }, { "epoch": 0.83, "grad_norm": 0.30078125, "learning_rate": 4.700881004836736e-05, "loss": 2.287, "step": 6211 }, { "epoch": 0.83, "grad_norm": 0.283203125, "learning_rate": 4.700780600729481e-05, "loss": 2.2897, "step": 6212 }, { "epoch": 0.83, "grad_norm": 0.30078125, "learning_rate": 4.700680180846541e-05, "loss": 2.2598, "step": 6213 }, { "epoch": 0.83, "grad_norm": 0.296875, "learning_rate": 4.7005797451886367e-05, "loss": 2.2656, "step": 6214 }, { "epoch": 0.83, "grad_norm": 0.287109375, "learning_rate": 4.700479293756488e-05, "loss": 2.2736, "step": 6215 }, { "epoch": 0.83, "grad_norm": 0.302734375, "learning_rate": 4.700378826550814e-05, "loss": 2.2481, "step": 6216 }, { "epoch": 0.83, "grad_norm": 0.279296875, "learning_rate": 4.700278343572336e-05, "loss": 2.3003, "step": 6217 }, { "epoch": 0.83, "grad_norm": 0.30078125, "learning_rate": 4.700177844821774e-05, "loss": 2.2413, "step": 6218 }, { "epoch": 0.83, "grad_norm": 0.2890625, "learning_rate": 4.700077330299847e-05, "loss": 2.2761, "step": 6219 }, { "epoch": 0.83, "grad_norm": 0.30078125, "learning_rate": 4.699976800007277e-05, "loss": 2.2867, "step": 6220 }, { "epoch": 0.83, "grad_norm": 0.28515625, "learning_rate": 4.699876253944785e-05, "loss": 2.2563, "step": 6221 }, { "epoch": 0.83, "grad_norm": 0.30859375, "learning_rate": 4.69977569211309e-05, "loss": 2.2695, "step": 6222 }, { "epoch": 0.83, "grad_norm": 0.30859375, "learning_rate": 4.6996751145129144e-05, "loss": 2.2446, "step": 6223 }, { "epoch": 0.83, "grad_norm": 0.291015625, "learning_rate": 4.6995745211449786e-05, "loss": 2.2689, "step": 6224 }, { "epoch": 0.83, "grad_norm": 0.296875, "learning_rate": 4.699473912010004e-05, "loss": 2.2752, "step": 6225 }, { "epoch": 0.83, "grad_norm": 0.326171875, "learning_rate": 4.69937328710871e-05, "loss": 2.2611, "step": 6226 }, { "epoch": 0.83, "grad_norm": 0.287109375, "learning_rate": 4.699272646441821e-05, "loss": 2.2827, "step": 6227 }, { "epoch": 0.83, "grad_norm": 0.283203125, "learning_rate": 4.699171990010056e-05, "loss": 2.2943, "step": 6228 }, { "epoch": 0.83, "grad_norm": 0.2890625, "learning_rate": 4.699071317814138e-05, "loss": 2.246, "step": 6229 }, { "epoch": 0.83, "grad_norm": 0.287109375, "learning_rate": 4.6989706298547873e-05, "loss": 2.266, "step": 6230 }, { "epoch": 0.83, "grad_norm": 0.302734375, "learning_rate": 4.6988699261327266e-05, "loss": 2.2635, "step": 6231 }, { "epoch": 0.83, "grad_norm": 0.3046875, "learning_rate": 4.6987692066486776e-05, "loss": 2.2611, "step": 6232 }, { "epoch": 0.83, "grad_norm": 0.29296875, "learning_rate": 4.698668471403362e-05, "loss": 2.2619, "step": 6233 }, { "epoch": 0.83, "grad_norm": 0.314453125, "learning_rate": 4.698567720397502e-05, "loss": 2.2561, "step": 6234 }, { "epoch": 0.83, "grad_norm": 0.30078125, "learning_rate": 4.69846695363182e-05, "loss": 2.2793, "step": 6235 }, { "epoch": 0.83, "grad_norm": 0.296875, "learning_rate": 4.698366171107038e-05, "loss": 2.2137, "step": 6236 }, { "epoch": 0.83, "grad_norm": 0.291015625, "learning_rate": 4.698265372823879e-05, "loss": 2.2458, "step": 6237 }, { "epoch": 0.83, "grad_norm": 0.3125, "learning_rate": 4.698164558783065e-05, "loss": 2.2952, "step": 6238 }, { "epoch": 0.83, "grad_norm": 0.27734375, "learning_rate": 4.698063728985318e-05, "loss": 2.2693, "step": 6239 }, { "epoch": 0.83, "grad_norm": 0.318359375, "learning_rate": 4.6979628834313626e-05, "loss": 2.2643, "step": 6240 }, { "epoch": 0.83, "grad_norm": 0.296875, "learning_rate": 4.697862022121921e-05, "loss": 2.2533, "step": 6241 }, { "epoch": 0.83, "grad_norm": 0.2890625, "learning_rate": 4.697761145057714e-05, "loss": 2.2274, "step": 6242 }, { "epoch": 0.83, "grad_norm": 0.2890625, "learning_rate": 4.697660252239468e-05, "loss": 2.2374, "step": 6243 }, { "epoch": 0.83, "grad_norm": 0.306640625, "learning_rate": 4.6975593436679046e-05, "loss": 2.2784, "step": 6244 }, { "epoch": 0.83, "grad_norm": 0.2890625, "learning_rate": 4.6974584193437475e-05, "loss": 2.2853, "step": 6245 }, { "epoch": 0.83, "grad_norm": 0.318359375, "learning_rate": 4.697357479267719e-05, "loss": 2.268, "step": 6246 }, { "epoch": 0.83, "grad_norm": 0.279296875, "learning_rate": 4.697256523440545e-05, "loss": 2.2419, "step": 6247 }, { "epoch": 0.83, "grad_norm": 0.287109375, "learning_rate": 4.697155551862947e-05, "loss": 2.2746, "step": 6248 }, { "epoch": 0.83, "grad_norm": 0.30078125, "learning_rate": 4.69705456453565e-05, "loss": 2.2814, "step": 6249 }, { "epoch": 0.83, "grad_norm": 0.298828125, "learning_rate": 4.696953561459377e-05, "loss": 2.2756, "step": 6250 }, { "epoch": 0.83, "grad_norm": 0.3046875, "learning_rate": 4.696852542634852e-05, "loss": 2.2897, "step": 6251 }, { "epoch": 0.83, "grad_norm": 0.298828125, "learning_rate": 4.696751508062801e-05, "loss": 2.2371, "step": 6252 }, { "epoch": 0.83, "grad_norm": 0.29296875, "learning_rate": 4.6966504577439466e-05, "loss": 2.2472, "step": 6253 }, { "epoch": 0.83, "grad_norm": 0.2890625, "learning_rate": 4.6965493916790126e-05, "loss": 2.2703, "step": 6254 }, { "epoch": 0.83, "grad_norm": 0.294921875, "learning_rate": 4.6964483098687254e-05, "loss": 2.2674, "step": 6255 }, { "epoch": 0.83, "grad_norm": 0.28125, "learning_rate": 4.6963472123138074e-05, "loss": 2.2415, "step": 6256 }, { "epoch": 0.83, "grad_norm": 0.2890625, "learning_rate": 4.696246099014985e-05, "loss": 2.267, "step": 6257 }, { "epoch": 0.83, "grad_norm": 0.3203125, "learning_rate": 4.696144969972982e-05, "loss": 2.2547, "step": 6258 }, { "epoch": 0.83, "grad_norm": 0.296875, "learning_rate": 4.696043825188523e-05, "loss": 2.2511, "step": 6259 }, { "epoch": 0.84, "grad_norm": 0.3046875, "learning_rate": 4.695942664662335e-05, "loss": 2.2735, "step": 6260 }, { "epoch": 0.84, "grad_norm": 0.2890625, "learning_rate": 4.695841488395141e-05, "loss": 2.2821, "step": 6261 }, { "epoch": 0.84, "grad_norm": 0.318359375, "learning_rate": 4.695740296387668e-05, "loss": 2.2993, "step": 6262 }, { "epoch": 0.84, "grad_norm": 0.27734375, "learning_rate": 4.6956390886406395e-05, "loss": 2.2737, "step": 6263 }, { "epoch": 0.84, "grad_norm": 0.3203125, "learning_rate": 4.695537865154783e-05, "loss": 2.2225, "step": 6264 }, { "epoch": 0.84, "grad_norm": 0.283203125, "learning_rate": 4.695436625930822e-05, "loss": 2.2878, "step": 6265 }, { "epoch": 0.84, "grad_norm": 0.27734375, "learning_rate": 4.695335370969484e-05, "loss": 2.2813, "step": 6266 }, { "epoch": 0.84, "grad_norm": 0.310546875, "learning_rate": 4.695234100271493e-05, "loss": 2.236, "step": 6267 }, { "epoch": 0.84, "grad_norm": 0.294921875, "learning_rate": 4.695132813837577e-05, "loss": 2.2473, "step": 6268 }, { "epoch": 0.84, "grad_norm": 0.291015625, "learning_rate": 4.695031511668461e-05, "loss": 2.2625, "step": 6269 }, { "epoch": 0.84, "grad_norm": 0.283203125, "learning_rate": 4.6949301937648705e-05, "loss": 2.2587, "step": 6270 }, { "epoch": 0.84, "grad_norm": 0.298828125, "learning_rate": 4.694828860127532e-05, "loss": 2.2769, "step": 6271 }, { "epoch": 0.84, "grad_norm": 0.28515625, "learning_rate": 4.6947275107571736e-05, "loss": 2.2639, "step": 6272 }, { "epoch": 0.84, "grad_norm": 0.29296875, "learning_rate": 4.6946261456545203e-05, "loss": 2.2536, "step": 6273 }, { "epoch": 0.84, "grad_norm": 0.287109375, "learning_rate": 4.694524764820298e-05, "loss": 2.3149, "step": 6274 }, { "epoch": 0.84, "grad_norm": 0.306640625, "learning_rate": 4.6944233682552356e-05, "loss": 2.2948, "step": 6275 }, { "epoch": 0.84, "grad_norm": 0.29296875, "learning_rate": 4.6943219559600584e-05, "loss": 2.3064, "step": 6276 }, { "epoch": 0.84, "grad_norm": 0.2890625, "learning_rate": 4.6942205279354935e-05, "loss": 2.286, "step": 6277 }, { "epoch": 0.84, "grad_norm": 0.291015625, "learning_rate": 4.694119084182268e-05, "loss": 2.2625, "step": 6278 }, { "epoch": 0.84, "grad_norm": 0.279296875, "learning_rate": 4.6940176247011093e-05, "loss": 2.2726, "step": 6279 }, { "epoch": 0.84, "grad_norm": 0.28125, "learning_rate": 4.693916149492744e-05, "loss": 2.2895, "step": 6280 }, { "epoch": 0.84, "grad_norm": 0.306640625, "learning_rate": 4.6938146585579006e-05, "loss": 2.28, "step": 6281 }, { "epoch": 0.84, "grad_norm": 0.30078125, "learning_rate": 4.693713151897306e-05, "loss": 2.3028, "step": 6282 }, { "epoch": 0.84, "grad_norm": 0.283203125, "learning_rate": 4.693611629511688e-05, "loss": 2.2602, "step": 6283 }, { "epoch": 0.84, "grad_norm": 0.302734375, "learning_rate": 4.693510091401774e-05, "loss": 2.2578, "step": 6284 }, { "epoch": 0.84, "grad_norm": 0.294921875, "learning_rate": 4.6934085375682925e-05, "loss": 2.261, "step": 6285 }, { "epoch": 0.84, "grad_norm": 0.28125, "learning_rate": 4.69330696801197e-05, "loss": 2.1962, "step": 6286 }, { "epoch": 0.84, "grad_norm": 0.28125, "learning_rate": 4.693205382733537e-05, "loss": 2.256, "step": 6287 }, { "epoch": 0.84, "grad_norm": 0.291015625, "learning_rate": 4.693103781733719e-05, "loss": 2.258, "step": 6288 }, { "epoch": 0.84, "grad_norm": 0.29296875, "learning_rate": 4.693002165013246e-05, "loss": 2.2362, "step": 6289 }, { "epoch": 0.84, "grad_norm": 0.2890625, "learning_rate": 4.6929005325728456e-05, "loss": 2.2785, "step": 6290 }, { "epoch": 0.84, "grad_norm": 0.30859375, "learning_rate": 4.692798884413248e-05, "loss": 2.287, "step": 6291 }, { "epoch": 0.84, "grad_norm": 0.291015625, "learning_rate": 4.692697220535179e-05, "loss": 2.2398, "step": 6292 }, { "epoch": 0.84, "grad_norm": 0.28125, "learning_rate": 4.6925955409393705e-05, "loss": 2.2405, "step": 6293 }, { "epoch": 0.84, "grad_norm": 0.28515625, "learning_rate": 4.692493845626549e-05, "loss": 2.2248, "step": 6294 }, { "epoch": 0.84, "grad_norm": 0.28125, "learning_rate": 4.692392134597444e-05, "loss": 2.2565, "step": 6295 }, { "epoch": 0.84, "grad_norm": 0.294921875, "learning_rate": 4.692290407852785e-05, "loss": 2.2636, "step": 6296 }, { "epoch": 0.84, "grad_norm": 0.2734375, "learning_rate": 4.6921886653933014e-05, "loss": 2.2705, "step": 6297 }, { "epoch": 0.84, "grad_norm": 0.298828125, "learning_rate": 4.6920869072197214e-05, "loss": 2.2517, "step": 6298 }, { "epoch": 0.84, "grad_norm": 0.310546875, "learning_rate": 4.691985133332776e-05, "loss": 2.2404, "step": 6299 }, { "epoch": 0.84, "grad_norm": 0.314453125, "learning_rate": 4.691883343733193e-05, "loss": 2.2431, "step": 6300 }, { "epoch": 0.84, "grad_norm": 0.306640625, "learning_rate": 4.691781538421703e-05, "loss": 2.2431, "step": 6301 }, { "epoch": 0.84, "grad_norm": 0.30078125, "learning_rate": 4.691679717399037e-05, "loss": 2.2475, "step": 6302 }, { "epoch": 0.84, "grad_norm": 0.30078125, "learning_rate": 4.691577880665922e-05, "loss": 2.2423, "step": 6303 }, { "epoch": 0.84, "grad_norm": 0.275390625, "learning_rate": 4.691476028223091e-05, "loss": 2.2671, "step": 6304 }, { "epoch": 0.84, "grad_norm": 0.287109375, "learning_rate": 4.6913741600712713e-05, "loss": 2.2661, "step": 6305 }, { "epoch": 0.84, "grad_norm": 0.30078125, "learning_rate": 4.6912722762111954e-05, "loss": 2.2616, "step": 6306 }, { "epoch": 0.84, "grad_norm": 0.29296875, "learning_rate": 4.691170376643593e-05, "loss": 2.2466, "step": 6307 }, { "epoch": 0.84, "grad_norm": 0.3046875, "learning_rate": 4.691068461369193e-05, "loss": 2.2595, "step": 6308 }, { "epoch": 0.84, "grad_norm": 0.294921875, "learning_rate": 4.6909665303887287e-05, "loss": 2.2961, "step": 6309 }, { "epoch": 0.84, "grad_norm": 0.3046875, "learning_rate": 4.690864583702929e-05, "loss": 2.2627, "step": 6310 }, { "epoch": 0.84, "grad_norm": 0.291015625, "learning_rate": 4.690762621312524e-05, "loss": 2.25, "step": 6311 }, { "epoch": 0.84, "grad_norm": 0.298828125, "learning_rate": 4.690660643218246e-05, "loss": 2.2609, "step": 6312 }, { "epoch": 0.84, "grad_norm": 0.283203125, "learning_rate": 4.690558649420825e-05, "loss": 2.2723, "step": 6313 }, { "epoch": 0.84, "grad_norm": 0.296875, "learning_rate": 4.690456639920994e-05, "loss": 2.2595, "step": 6314 }, { "epoch": 0.84, "grad_norm": 0.28125, "learning_rate": 4.690354614719482e-05, "loss": 2.2528, "step": 6315 }, { "epoch": 0.84, "grad_norm": 0.287109375, "learning_rate": 4.690252573817021e-05, "loss": 2.254, "step": 6316 }, { "epoch": 0.84, "grad_norm": 0.3203125, "learning_rate": 4.690150517214343e-05, "loss": 2.2667, "step": 6317 }, { "epoch": 0.84, "grad_norm": 0.294921875, "learning_rate": 4.69004844491218e-05, "loss": 2.2482, "step": 6318 }, { "epoch": 0.84, "grad_norm": 0.287109375, "learning_rate": 4.6899463569112614e-05, "loss": 2.2669, "step": 6319 }, { "epoch": 0.84, "grad_norm": 0.2890625, "learning_rate": 4.6898442532123214e-05, "loss": 2.2566, "step": 6320 }, { "epoch": 0.84, "grad_norm": 0.3046875, "learning_rate": 4.689742133816091e-05, "loss": 2.2428, "step": 6321 }, { "epoch": 0.84, "grad_norm": 0.294921875, "learning_rate": 4.689639998723302e-05, "loss": 2.2637, "step": 6322 }, { "epoch": 0.84, "grad_norm": 0.30078125, "learning_rate": 4.689537847934687e-05, "loss": 2.2808, "step": 6323 }, { "epoch": 0.84, "grad_norm": 0.3125, "learning_rate": 4.689435681450978e-05, "loss": 2.2716, "step": 6324 }, { "epoch": 0.84, "grad_norm": 0.28125, "learning_rate": 4.689333499272907e-05, "loss": 2.2622, "step": 6325 }, { "epoch": 0.84, "grad_norm": 0.3046875, "learning_rate": 4.6892313014012074e-05, "loss": 2.2824, "step": 6326 }, { "epoch": 0.84, "grad_norm": 0.294921875, "learning_rate": 4.6891290878366114e-05, "loss": 2.2632, "step": 6327 }, { "epoch": 0.84, "grad_norm": 0.296875, "learning_rate": 4.68902685857985e-05, "loss": 2.2581, "step": 6328 }, { "epoch": 0.84, "grad_norm": 0.28515625, "learning_rate": 4.688924613631659e-05, "loss": 2.3043, "step": 6329 }, { "epoch": 0.84, "grad_norm": 0.29296875, "learning_rate": 4.688822352992769e-05, "loss": 2.2659, "step": 6330 }, { "epoch": 0.84, "grad_norm": 0.30078125, "learning_rate": 4.688720076663914e-05, "loss": 2.2643, "step": 6331 }, { "epoch": 0.84, "grad_norm": 0.296875, "learning_rate": 4.688617784645827e-05, "loss": 2.2344, "step": 6332 }, { "epoch": 0.84, "grad_norm": 0.291015625, "learning_rate": 4.688515476939241e-05, "loss": 2.2818, "step": 6333 }, { "epoch": 0.84, "grad_norm": 0.294921875, "learning_rate": 4.6884131535448895e-05, "loss": 2.2769, "step": 6334 }, { "epoch": 0.85, "grad_norm": 0.279296875, "learning_rate": 4.6883108144635064e-05, "loss": 2.2972, "step": 6335 }, { "epoch": 0.85, "grad_norm": 0.298828125, "learning_rate": 4.688208459695824e-05, "loss": 2.2887, "step": 6336 }, { "epoch": 0.85, "grad_norm": 0.302734375, "learning_rate": 4.688106089242578e-05, "loss": 2.2484, "step": 6337 }, { "epoch": 0.85, "grad_norm": 0.318359375, "learning_rate": 4.688003703104501e-05, "loss": 2.2461, "step": 6338 }, { "epoch": 0.85, "grad_norm": 0.296875, "learning_rate": 4.687901301282327e-05, "loss": 2.2602, "step": 6339 }, { "epoch": 0.85, "grad_norm": 0.294921875, "learning_rate": 4.6877988837767906e-05, "loss": 2.2836, "step": 6340 }, { "epoch": 0.85, "grad_norm": 0.296875, "learning_rate": 4.687696450588625e-05, "loss": 2.2299, "step": 6341 }, { "epoch": 0.85, "grad_norm": 0.302734375, "learning_rate": 4.687594001718565e-05, "loss": 2.2828, "step": 6342 }, { "epoch": 0.85, "grad_norm": 0.283203125, "learning_rate": 4.6874915371673444e-05, "loss": 2.2541, "step": 6343 }, { "epoch": 0.85, "grad_norm": 0.30078125, "learning_rate": 4.6873890569356985e-05, "loss": 2.2542, "step": 6344 }, { "epoch": 0.85, "grad_norm": 0.3125, "learning_rate": 4.687286561024362e-05, "loss": 2.2769, "step": 6345 }, { "epoch": 0.85, "grad_norm": 0.2890625, "learning_rate": 4.687184049434069e-05, "loss": 2.2732, "step": 6346 }, { "epoch": 0.85, "grad_norm": 0.296875, "learning_rate": 4.687081522165554e-05, "loss": 2.3001, "step": 6347 }, { "epoch": 0.85, "grad_norm": 0.3046875, "learning_rate": 4.6869789792195526e-05, "loss": 2.2696, "step": 6348 }, { "epoch": 0.85, "grad_norm": 0.546875, "learning_rate": 4.686876420596801e-05, "loss": 2.2771, "step": 6349 }, { "epoch": 0.85, "grad_norm": 0.28515625, "learning_rate": 4.686773846298032e-05, "loss": 2.2914, "step": 6350 }, { "epoch": 0.85, "grad_norm": 0.296875, "learning_rate": 4.686671256323982e-05, "loss": 2.2547, "step": 6351 }, { "epoch": 0.85, "grad_norm": 0.302734375, "learning_rate": 4.686568650675386e-05, "loss": 2.2549, "step": 6352 }, { "epoch": 0.85, "grad_norm": 0.306640625, "learning_rate": 4.6864660293529804e-05, "loss": 2.2796, "step": 6353 }, { "epoch": 0.85, "grad_norm": 0.296875, "learning_rate": 4.6863633923575e-05, "loss": 2.2594, "step": 6354 }, { "epoch": 0.85, "grad_norm": 0.287109375, "learning_rate": 4.686260739689682e-05, "loss": 2.2292, "step": 6355 }, { "epoch": 0.85, "grad_norm": 0.3046875, "learning_rate": 4.68615807135026e-05, "loss": 2.2706, "step": 6356 }, { "epoch": 0.85, "grad_norm": 0.29296875, "learning_rate": 4.68605538733997e-05, "loss": 2.2776, "step": 6357 }, { "epoch": 0.85, "grad_norm": 0.2890625, "learning_rate": 4.685952687659551e-05, "loss": 2.2585, "step": 6358 }, { "epoch": 0.85, "grad_norm": 0.291015625, "learning_rate": 4.685849972309736e-05, "loss": 2.2569, "step": 6359 }, { "epoch": 0.85, "grad_norm": 0.3046875, "learning_rate": 4.685747241291263e-05, "loss": 2.2219, "step": 6360 }, { "epoch": 0.85, "grad_norm": 0.29296875, "learning_rate": 4.6856444946048674e-05, "loss": 2.2649, "step": 6361 }, { "epoch": 0.85, "grad_norm": 0.328125, "learning_rate": 4.685541732251286e-05, "loss": 2.2664, "step": 6362 }, { "epoch": 0.85, "grad_norm": 0.28515625, "learning_rate": 4.6854389542312566e-05, "loss": 2.2364, "step": 6363 }, { "epoch": 0.85, "grad_norm": 0.29296875, "learning_rate": 4.685336160545514e-05, "loss": 2.3008, "step": 6364 }, { "epoch": 0.85, "grad_norm": 0.310546875, "learning_rate": 4.685233351194796e-05, "loss": 2.2981, "step": 6365 }, { "epoch": 0.85, "grad_norm": 0.306640625, "learning_rate": 4.6851305261798406e-05, "loss": 2.2685, "step": 6366 }, { "epoch": 0.85, "grad_norm": 0.314453125, "learning_rate": 4.6850276855013836e-05, "loss": 2.264, "step": 6367 }, { "epoch": 0.85, "grad_norm": 0.310546875, "learning_rate": 4.684924829160162e-05, "loss": 2.2611, "step": 6368 }, { "epoch": 0.85, "grad_norm": 0.30078125, "learning_rate": 4.684821957156914e-05, "loss": 2.2552, "step": 6369 }, { "epoch": 0.85, "grad_norm": 0.279296875, "learning_rate": 4.684719069492377e-05, "loss": 2.2701, "step": 6370 }, { "epoch": 0.85, "grad_norm": 0.287109375, "learning_rate": 4.6846161661672864e-05, "loss": 2.2422, "step": 6371 }, { "epoch": 0.85, "grad_norm": 0.287109375, "learning_rate": 4.6845132471823824e-05, "loss": 2.2548, "step": 6372 }, { "epoch": 0.85, "grad_norm": 0.298828125, "learning_rate": 4.684410312538402e-05, "loss": 2.2629, "step": 6373 }, { "epoch": 0.85, "grad_norm": 0.30078125, "learning_rate": 4.684307362236083e-05, "loss": 2.2722, "step": 6374 }, { "epoch": 0.85, "grad_norm": 0.31640625, "learning_rate": 4.684204396276163e-05, "loss": 2.2587, "step": 6375 }, { "epoch": 0.85, "grad_norm": 0.296875, "learning_rate": 4.68410141465938e-05, "loss": 2.2508, "step": 6376 }, { "epoch": 0.85, "grad_norm": 0.29296875, "learning_rate": 4.6839984173864736e-05, "loss": 2.2613, "step": 6377 }, { "epoch": 0.85, "grad_norm": 0.3046875, "learning_rate": 4.68389540445818e-05, "loss": 2.265, "step": 6378 }, { "epoch": 0.85, "grad_norm": 0.296875, "learning_rate": 4.6837923758752386e-05, "loss": 2.2478, "step": 6379 }, { "epoch": 0.85, "grad_norm": 0.298828125, "learning_rate": 4.683689331638389e-05, "loss": 2.25, "step": 6380 }, { "epoch": 0.85, "grad_norm": 0.3125, "learning_rate": 4.683586271748368e-05, "loss": 2.2705, "step": 6381 }, { "epoch": 0.85, "grad_norm": 0.310546875, "learning_rate": 4.683483196205915e-05, "loss": 2.2615, "step": 6382 }, { "epoch": 0.85, "grad_norm": 0.296875, "learning_rate": 4.683380105011771e-05, "loss": 2.3134, "step": 6383 }, { "epoch": 0.85, "grad_norm": 0.287109375, "learning_rate": 4.683276998166671e-05, "loss": 2.2896, "step": 6384 }, { "epoch": 0.85, "grad_norm": 0.3046875, "learning_rate": 4.683173875671357e-05, "loss": 2.29, "step": 6385 }, { "epoch": 0.85, "grad_norm": 0.28125, "learning_rate": 4.683070737526567e-05, "loss": 2.2275, "step": 6386 }, { "epoch": 0.85, "grad_norm": 0.294921875, "learning_rate": 4.682967583733041e-05, "loss": 2.2425, "step": 6387 }, { "epoch": 0.85, "grad_norm": 0.294921875, "learning_rate": 4.682864414291518e-05, "loss": 2.2687, "step": 6388 }, { "epoch": 0.85, "grad_norm": 0.30078125, "learning_rate": 4.682761229202738e-05, "loss": 2.2688, "step": 6389 }, { "epoch": 0.85, "grad_norm": 0.29296875, "learning_rate": 4.68265802846744e-05, "loss": 2.2696, "step": 6390 }, { "epoch": 0.85, "grad_norm": 0.28125, "learning_rate": 4.682554812086364e-05, "loss": 2.2493, "step": 6391 }, { "epoch": 0.85, "grad_norm": 0.28515625, "learning_rate": 4.68245158006025e-05, "loss": 2.2665, "step": 6392 }, { "epoch": 0.85, "grad_norm": 0.298828125, "learning_rate": 4.6823483323898375e-05, "loss": 2.2714, "step": 6393 }, { "epoch": 0.85, "grad_norm": 0.3125, "learning_rate": 4.682245069075868e-05, "loss": 2.2624, "step": 6394 }, { "epoch": 0.85, "grad_norm": 0.296875, "learning_rate": 4.68214179011908e-05, "loss": 2.2617, "step": 6395 }, { "epoch": 0.85, "grad_norm": 0.31640625, "learning_rate": 4.6820384955202144e-05, "loss": 2.2853, "step": 6396 }, { "epoch": 0.85, "grad_norm": 0.32421875, "learning_rate": 4.681935185280012e-05, "loss": 2.2792, "step": 6397 }, { "epoch": 0.85, "grad_norm": 0.283203125, "learning_rate": 4.681831859399214e-05, "loss": 2.2498, "step": 6398 }, { "epoch": 0.85, "grad_norm": 0.294921875, "learning_rate": 4.681728517878559e-05, "loss": 2.2591, "step": 6399 }, { "epoch": 0.85, "grad_norm": 0.2890625, "learning_rate": 4.68162516071879e-05, "loss": 2.2853, "step": 6400 }, { "epoch": 0.85, "grad_norm": 0.275390625, "learning_rate": 4.681521787920646e-05, "loss": 2.291, "step": 6401 }, { "epoch": 0.85, "grad_norm": 0.279296875, "learning_rate": 4.681418399484869e-05, "loss": 2.2776, "step": 6402 }, { "epoch": 0.85, "grad_norm": 0.296875, "learning_rate": 4.681314995412201e-05, "loss": 2.2629, "step": 6403 }, { "epoch": 0.85, "grad_norm": 0.27734375, "learning_rate": 4.681211575703381e-05, "loss": 2.2516, "step": 6404 }, { "epoch": 0.85, "grad_norm": 0.3046875, "learning_rate": 4.6811081403591526e-05, "loss": 2.2599, "step": 6405 }, { "epoch": 0.85, "grad_norm": 0.2890625, "learning_rate": 4.6810046893802556e-05, "loss": 2.248, "step": 6406 }, { "epoch": 0.85, "grad_norm": 0.30859375, "learning_rate": 4.680901222767432e-05, "loss": 2.247, "step": 6407 }, { "epoch": 0.85, "grad_norm": 0.2890625, "learning_rate": 4.680797740521424e-05, "loss": 2.2739, "step": 6408 }, { "epoch": 0.85, "grad_norm": 0.29296875, "learning_rate": 4.6806942426429734e-05, "loss": 2.2676, "step": 6409 }, { "epoch": 0.86, "grad_norm": 0.291015625, "learning_rate": 4.680590729132821e-05, "loss": 2.272, "step": 6410 }, { "epoch": 0.86, "grad_norm": 0.29296875, "learning_rate": 4.68048719999171e-05, "loss": 2.2173, "step": 6411 }, { "epoch": 0.86, "grad_norm": 0.28515625, "learning_rate": 4.680383655220382e-05, "loss": 2.2859, "step": 6412 }, { "epoch": 0.86, "grad_norm": 0.296875, "learning_rate": 4.680280094819579e-05, "loss": 2.2744, "step": 6413 }, { "epoch": 0.86, "grad_norm": 0.27734375, "learning_rate": 4.6801765187900436e-05, "loss": 2.2683, "step": 6414 }, { "epoch": 0.86, "grad_norm": 0.283203125, "learning_rate": 4.6800729271325185e-05, "loss": 2.2864, "step": 6415 }, { "epoch": 0.86, "grad_norm": 0.291015625, "learning_rate": 4.679969319847746e-05, "loss": 2.2577, "step": 6416 }, { "epoch": 0.86, "grad_norm": 0.296875, "learning_rate": 4.679865696936469e-05, "loss": 2.2453, "step": 6417 }, { "epoch": 0.86, "grad_norm": 0.296875, "learning_rate": 4.6797620583994294e-05, "loss": 2.2489, "step": 6418 }, { "epoch": 0.86, "grad_norm": 0.287109375, "learning_rate": 4.679658404237371e-05, "loss": 2.2609, "step": 6419 }, { "epoch": 0.86, "grad_norm": 0.287109375, "learning_rate": 4.6795547344510375e-05, "loss": 2.2542, "step": 6420 }, { "epoch": 0.86, "grad_norm": 0.28125, "learning_rate": 4.67945104904117e-05, "loss": 2.2744, "step": 6421 }, { "epoch": 0.86, "grad_norm": 0.27734375, "learning_rate": 4.679347348008514e-05, "loss": 2.244, "step": 6422 }, { "epoch": 0.86, "grad_norm": 0.283203125, "learning_rate": 4.679243631353811e-05, "loss": 2.2605, "step": 6423 }, { "epoch": 0.86, "grad_norm": 0.28515625, "learning_rate": 4.679139899077806e-05, "loss": 2.2359, "step": 6424 }, { "epoch": 0.86, "grad_norm": 0.287109375, "learning_rate": 4.679036151181241e-05, "loss": 2.2783, "step": 6425 }, { "epoch": 0.86, "grad_norm": 0.3125, "learning_rate": 4.678932387664861e-05, "loss": 2.2307, "step": 6426 }, { "epoch": 0.86, "grad_norm": 0.326171875, "learning_rate": 4.678828608529409e-05, "loss": 2.2636, "step": 6427 }, { "epoch": 0.86, "grad_norm": 0.291015625, "learning_rate": 4.67872481377563e-05, "loss": 2.2583, "step": 6428 }, { "epoch": 0.86, "grad_norm": 0.28515625, "learning_rate": 4.6786210034042664e-05, "loss": 2.2854, "step": 6429 }, { "epoch": 0.86, "grad_norm": 0.306640625, "learning_rate": 4.678517177416063e-05, "loss": 2.2716, "step": 6430 }, { "epoch": 0.86, "grad_norm": 0.30078125, "learning_rate": 4.6784133358117644e-05, "loss": 2.318, "step": 6431 }, { "epoch": 0.86, "grad_norm": 0.314453125, "learning_rate": 4.678309478592115e-05, "loss": 2.258, "step": 6432 }, { "epoch": 0.86, "grad_norm": 0.296875, "learning_rate": 4.678205605757859e-05, "loss": 2.241, "step": 6433 }, { "epoch": 0.86, "grad_norm": 0.29296875, "learning_rate": 4.678101717309741e-05, "loss": 2.2783, "step": 6434 }, { "epoch": 0.86, "grad_norm": 0.279296875, "learning_rate": 4.6779978132485056e-05, "loss": 2.265, "step": 6435 }, { "epoch": 0.86, "grad_norm": 0.296875, "learning_rate": 4.6778938935748973e-05, "loss": 2.2288, "step": 6436 }, { "epoch": 0.86, "grad_norm": 0.294921875, "learning_rate": 4.677789958289662e-05, "loss": 2.261, "step": 6437 }, { "epoch": 0.86, "grad_norm": 0.294921875, "learning_rate": 4.677686007393544e-05, "loss": 2.2442, "step": 6438 }, { "epoch": 0.86, "grad_norm": 0.3125, "learning_rate": 4.677582040887289e-05, "loss": 2.2459, "step": 6439 }, { "epoch": 0.86, "grad_norm": 0.31640625, "learning_rate": 4.6774780587716416e-05, "loss": 2.259, "step": 6440 }, { "epoch": 0.86, "grad_norm": 0.291015625, "learning_rate": 4.677374061047347e-05, "loss": 2.2708, "step": 6441 }, { "epoch": 0.86, "grad_norm": 0.3046875, "learning_rate": 4.677270047715152e-05, "loss": 2.2543, "step": 6442 }, { "epoch": 0.86, "grad_norm": 0.287109375, "learning_rate": 4.6771660187758e-05, "loss": 2.251, "step": 6443 }, { "epoch": 0.86, "grad_norm": 0.30078125, "learning_rate": 4.6770619742300394e-05, "loss": 2.275, "step": 6444 }, { "epoch": 0.86, "grad_norm": 0.3046875, "learning_rate": 4.676957914078614e-05, "loss": 2.275, "step": 6445 }, { "epoch": 0.86, "grad_norm": 0.341796875, "learning_rate": 4.6768538383222705e-05, "loss": 2.26, "step": 6446 }, { "epoch": 0.86, "grad_norm": 0.30859375, "learning_rate": 4.676749746961755e-05, "loss": 2.2594, "step": 6447 }, { "epoch": 0.86, "grad_norm": 0.283203125, "learning_rate": 4.676645639997813e-05, "loss": 2.2683, "step": 6448 }, { "epoch": 0.86, "grad_norm": 0.294921875, "learning_rate": 4.676541517431191e-05, "loss": 2.2531, "step": 6449 }, { "epoch": 0.86, "grad_norm": 0.296875, "learning_rate": 4.6764373792626356e-05, "loss": 2.2504, "step": 6450 }, { "epoch": 0.86, "grad_norm": 0.283203125, "learning_rate": 4.676333225492894e-05, "loss": 2.2695, "step": 6451 }, { "epoch": 0.86, "grad_norm": 0.310546875, "learning_rate": 4.676229056122712e-05, "loss": 2.2524, "step": 6452 }, { "epoch": 0.86, "grad_norm": 0.291015625, "learning_rate": 4.6761248711528364e-05, "loss": 2.3161, "step": 6453 }, { "epoch": 0.86, "grad_norm": 0.27734375, "learning_rate": 4.676020670584013e-05, "loss": 2.2613, "step": 6454 }, { "epoch": 0.86, "grad_norm": 0.333984375, "learning_rate": 4.6759164544169906e-05, "loss": 2.3004, "step": 6455 }, { "epoch": 0.86, "grad_norm": 0.30078125, "learning_rate": 4.675812222652515e-05, "loss": 2.2542, "step": 6456 }, { "epoch": 0.86, "grad_norm": 0.30078125, "learning_rate": 4.675707975291333e-05, "loss": 2.2805, "step": 6457 }, { "epoch": 0.86, "grad_norm": 0.296875, "learning_rate": 4.675603712334194e-05, "loss": 2.2674, "step": 6458 }, { "epoch": 0.86, "grad_norm": 0.2890625, "learning_rate": 4.6754994337818436e-05, "loss": 2.2509, "step": 6459 }, { "epoch": 0.86, "grad_norm": 0.279296875, "learning_rate": 4.675395139635029e-05, "loss": 2.2691, "step": 6460 }, { "epoch": 0.86, "grad_norm": 0.30859375, "learning_rate": 4.675290829894499e-05, "loss": 2.2791, "step": 6461 }, { "epoch": 0.86, "grad_norm": 0.314453125, "learning_rate": 4.675186504561001e-05, "loss": 2.2573, "step": 6462 }, { "epoch": 0.86, "grad_norm": 0.30859375, "learning_rate": 4.675082163635282e-05, "loss": 2.236, "step": 6463 }, { "epoch": 0.86, "grad_norm": 0.310546875, "learning_rate": 4.674977807118091e-05, "loss": 2.2471, "step": 6464 }, { "epoch": 0.86, "grad_norm": 0.28515625, "learning_rate": 4.674873435010175e-05, "loss": 2.2581, "step": 6465 }, { "epoch": 0.86, "grad_norm": 0.296875, "learning_rate": 4.674769047312284e-05, "loss": 2.2601, "step": 6466 }, { "epoch": 0.86, "grad_norm": 0.3125, "learning_rate": 4.674664644025164e-05, "loss": 2.2594, "step": 6467 }, { "epoch": 0.86, "grad_norm": 0.298828125, "learning_rate": 4.674560225149564e-05, "loss": 2.246, "step": 6468 }, { "epoch": 0.86, "grad_norm": 0.291015625, "learning_rate": 4.674455790686234e-05, "loss": 2.2537, "step": 6469 }, { "epoch": 0.86, "grad_norm": 0.31640625, "learning_rate": 4.674351340635921e-05, "loss": 2.2534, "step": 6470 }, { "epoch": 0.86, "grad_norm": 0.294921875, "learning_rate": 4.6742468749993743e-05, "loss": 2.2733, "step": 6471 }, { "epoch": 0.86, "grad_norm": 0.291015625, "learning_rate": 4.6741423937773434e-05, "loss": 2.2425, "step": 6472 }, { "epoch": 0.86, "grad_norm": 0.310546875, "learning_rate": 4.674037896970576e-05, "loss": 2.2469, "step": 6473 }, { "epoch": 0.86, "grad_norm": 0.28515625, "learning_rate": 4.673933384579821e-05, "loss": 2.2745, "step": 6474 }, { "epoch": 0.86, "grad_norm": 0.306640625, "learning_rate": 4.673828856605829e-05, "loss": 2.2452, "step": 6475 }, { "epoch": 0.86, "grad_norm": 0.2734375, "learning_rate": 4.673724313049348e-05, "loss": 2.2771, "step": 6476 }, { "epoch": 0.86, "grad_norm": 0.291015625, "learning_rate": 4.6736197539111284e-05, "loss": 2.275, "step": 6477 }, { "epoch": 0.86, "grad_norm": 0.27734375, "learning_rate": 4.673515179191919e-05, "loss": 2.2732, "step": 6478 }, { "epoch": 0.86, "grad_norm": 0.296875, "learning_rate": 4.6734105888924694e-05, "loss": 2.2577, "step": 6479 }, { "epoch": 0.86, "grad_norm": 0.3046875, "learning_rate": 4.673305983013531e-05, "loss": 2.2739, "step": 6480 }, { "epoch": 0.86, "grad_norm": 0.302734375, "learning_rate": 4.67320136155585e-05, "loss": 2.266, "step": 6481 }, { "epoch": 0.86, "grad_norm": 0.314453125, "learning_rate": 4.67309672452018e-05, "loss": 2.2443, "step": 6482 }, { "epoch": 0.86, "grad_norm": 0.296875, "learning_rate": 4.672992071907269e-05, "loss": 2.2812, "step": 6483 }, { "epoch": 0.86, "grad_norm": 0.30078125, "learning_rate": 4.672887403717868e-05, "loss": 2.2905, "step": 6484 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.6727827199527275e-05, "loss": 2.2479, "step": 6485 }, { "epoch": 0.87, "grad_norm": 0.296875, "learning_rate": 4.6726780206125976e-05, "loss": 2.2448, "step": 6486 }, { "epoch": 0.87, "grad_norm": 0.31640625, "learning_rate": 4.672573305698228e-05, "loss": 2.2316, "step": 6487 }, { "epoch": 0.87, "grad_norm": 0.3125, "learning_rate": 4.6724685752103704e-05, "loss": 2.253, "step": 6488 }, { "epoch": 0.87, "grad_norm": 0.30078125, "learning_rate": 4.672363829149775e-05, "loss": 2.2422, "step": 6489 }, { "epoch": 0.87, "grad_norm": 0.3046875, "learning_rate": 4.6722590675171926e-05, "loss": 2.2721, "step": 6490 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.672154290313374e-05, "loss": 2.2901, "step": 6491 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.672049497539071e-05, "loss": 2.2751, "step": 6492 }, { "epoch": 0.87, "grad_norm": 0.30859375, "learning_rate": 4.671944689195035e-05, "loss": 2.2679, "step": 6493 }, { "epoch": 0.87, "grad_norm": 0.310546875, "learning_rate": 4.6718398652820155e-05, "loss": 2.269, "step": 6494 }, { "epoch": 0.87, "grad_norm": 0.298828125, "learning_rate": 4.671735025800765e-05, "loss": 2.2407, "step": 6495 }, { "epoch": 0.87, "grad_norm": 0.30859375, "learning_rate": 4.671630170752036e-05, "loss": 2.2866, "step": 6496 }, { "epoch": 0.87, "grad_norm": 0.296875, "learning_rate": 4.671525300136579e-05, "loss": 2.241, "step": 6497 }, { "epoch": 0.87, "grad_norm": 0.302734375, "learning_rate": 4.671420413955146e-05, "loss": 2.2929, "step": 6498 }, { "epoch": 0.87, "grad_norm": 0.30078125, "learning_rate": 4.671315512208489e-05, "loss": 2.2598, "step": 6499 }, { "epoch": 0.87, "grad_norm": 0.298828125, "learning_rate": 4.671210594897359e-05, "loss": 2.2774, "step": 6500 }, { "epoch": 0.87, "grad_norm": 0.296875, "learning_rate": 4.671105662022509e-05, "loss": 2.2405, "step": 6501 }, { "epoch": 0.87, "grad_norm": 0.35546875, "learning_rate": 4.671000713584691e-05, "loss": 2.2508, "step": 6502 }, { "epoch": 0.87, "grad_norm": 0.279296875, "learning_rate": 4.670895749584658e-05, "loss": 2.1894, "step": 6503 }, { "epoch": 0.87, "grad_norm": 0.30078125, "learning_rate": 4.6707907700231614e-05, "loss": 2.2281, "step": 6504 }, { "epoch": 0.87, "grad_norm": 0.30078125, "learning_rate": 4.670685774900954e-05, "loss": 2.2583, "step": 6505 }, { "epoch": 0.87, "grad_norm": 0.294921875, "learning_rate": 4.670580764218788e-05, "loss": 2.2543, "step": 6506 }, { "epoch": 0.87, "grad_norm": 0.28125, "learning_rate": 4.6704757379774164e-05, "loss": 2.2772, "step": 6507 }, { "epoch": 0.87, "grad_norm": 0.296875, "learning_rate": 4.670370696177593e-05, "loss": 2.2752, "step": 6508 }, { "epoch": 0.87, "grad_norm": 0.2890625, "learning_rate": 4.670265638820069e-05, "loss": 2.239, "step": 6509 }, { "epoch": 0.87, "grad_norm": 0.296875, "learning_rate": 4.6701605659055994e-05, "loss": 2.2767, "step": 6510 }, { "epoch": 0.87, "grad_norm": 0.291015625, "learning_rate": 4.670055477434936e-05, "loss": 2.2512, "step": 6511 }, { "epoch": 0.87, "grad_norm": 0.291015625, "learning_rate": 4.6699503734088324e-05, "loss": 2.2395, "step": 6512 }, { "epoch": 0.87, "grad_norm": 0.287109375, "learning_rate": 4.669845253828042e-05, "loss": 2.2483, "step": 6513 }, { "epoch": 0.87, "grad_norm": 0.287109375, "learning_rate": 4.66974011869332e-05, "loss": 2.2472, "step": 6514 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.6696349680054174e-05, "loss": 2.2722, "step": 6515 }, { "epoch": 0.87, "grad_norm": 0.283203125, "learning_rate": 4.669529801765089e-05, "loss": 2.2773, "step": 6516 }, { "epoch": 0.87, "grad_norm": 0.279296875, "learning_rate": 4.669424619973089e-05, "loss": 2.2756, "step": 6517 }, { "epoch": 0.87, "grad_norm": 0.296875, "learning_rate": 4.6693194226301703e-05, "loss": 2.2459, "step": 6518 }, { "epoch": 0.87, "grad_norm": 0.287109375, "learning_rate": 4.6692142097370884e-05, "loss": 2.2585, "step": 6519 }, { "epoch": 0.87, "grad_norm": 0.287109375, "learning_rate": 4.669108981294597e-05, "loss": 2.2619, "step": 6520 }, { "epoch": 0.87, "grad_norm": 0.2890625, "learning_rate": 4.66900373730345e-05, "loss": 2.2758, "step": 6521 }, { "epoch": 0.87, "grad_norm": 0.275390625, "learning_rate": 4.668898477764402e-05, "loss": 2.245, "step": 6522 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.6687932026782075e-05, "loss": 2.2622, "step": 6523 }, { "epoch": 0.87, "grad_norm": 0.279296875, "learning_rate": 4.6686879120456214e-05, "loss": 2.2149, "step": 6524 }, { "epoch": 0.87, "grad_norm": 0.28125, "learning_rate": 4.668582605867399e-05, "loss": 2.2532, "step": 6525 }, { "epoch": 0.87, "grad_norm": 0.287109375, "learning_rate": 4.668477284144294e-05, "loss": 2.2251, "step": 6526 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.668371946877061e-05, "loss": 2.2799, "step": 6527 }, { "epoch": 0.87, "grad_norm": 0.302734375, "learning_rate": 4.668266594066456e-05, "loss": 2.2576, "step": 6528 }, { "epoch": 0.87, "grad_norm": 0.2734375, "learning_rate": 4.6681612257132345e-05, "loss": 2.2244, "step": 6529 }, { "epoch": 0.87, "grad_norm": 0.306640625, "learning_rate": 4.6680558418181515e-05, "loss": 2.2495, "step": 6530 }, { "epoch": 0.87, "grad_norm": 0.27734375, "learning_rate": 4.667950442381962e-05, "loss": 2.2521, "step": 6531 }, { "epoch": 0.87, "grad_norm": 0.30859375, "learning_rate": 4.667845027405422e-05, "loss": 2.2516, "step": 6532 }, { "epoch": 0.87, "grad_norm": 0.2890625, "learning_rate": 4.667739596889287e-05, "loss": 2.2741, "step": 6533 }, { "epoch": 0.87, "grad_norm": 0.287109375, "learning_rate": 4.667634150834312e-05, "loss": 2.2432, "step": 6534 }, { "epoch": 0.87, "grad_norm": 0.298828125, "learning_rate": 4.667528689241254e-05, "loss": 2.2683, "step": 6535 }, { "epoch": 0.87, "grad_norm": 0.2890625, "learning_rate": 4.6674232121108685e-05, "loss": 2.292, "step": 6536 }, { "epoch": 0.87, "grad_norm": 0.296875, "learning_rate": 4.667317719443912e-05, "loss": 2.2836, "step": 6537 }, { "epoch": 0.87, "grad_norm": 0.279296875, "learning_rate": 4.66721221124114e-05, "loss": 2.277, "step": 6538 }, { "epoch": 0.87, "grad_norm": 0.294921875, "learning_rate": 4.667106687503309e-05, "loss": 2.2954, "step": 6539 }, { "epoch": 0.87, "grad_norm": 0.310546875, "learning_rate": 4.667001148231176e-05, "loss": 2.2205, "step": 6540 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.6668955934254966e-05, "loss": 2.2522, "step": 6541 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.666790023087029e-05, "loss": 2.2504, "step": 6542 }, { "epoch": 0.87, "grad_norm": 0.302734375, "learning_rate": 4.6666844372165276e-05, "loss": 2.2605, "step": 6543 }, { "epoch": 0.87, "grad_norm": 0.291015625, "learning_rate": 4.66657883581475e-05, "loss": 2.2135, "step": 6544 }, { "epoch": 0.87, "grad_norm": 0.279296875, "learning_rate": 4.666473218882455e-05, "loss": 2.2617, "step": 6545 }, { "epoch": 0.87, "grad_norm": 0.3046875, "learning_rate": 4.666367586420398e-05, "loss": 2.2173, "step": 6546 }, { "epoch": 0.87, "grad_norm": 0.294921875, "learning_rate": 4.666261938429337e-05, "loss": 2.3168, "step": 6547 }, { "epoch": 0.87, "grad_norm": 0.296875, "learning_rate": 4.666156274910028e-05, "loss": 2.26, "step": 6548 }, { "epoch": 0.87, "grad_norm": 0.302734375, "learning_rate": 4.6660505958632295e-05, "loss": 2.2638, "step": 6549 }, { "epoch": 0.87, "grad_norm": 0.28515625, "learning_rate": 4.665944901289699e-05, "loss": 2.2742, "step": 6550 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.665839191190194e-05, "loss": 2.2174, "step": 6551 }, { "epoch": 0.87, "grad_norm": 0.310546875, "learning_rate": 4.665733465565472e-05, "loss": 2.297, "step": 6552 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.665627724416292e-05, "loss": 2.2718, "step": 6553 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.66552196774341e-05, "loss": 2.2477, "step": 6554 }, { "epoch": 0.87, "grad_norm": 0.28515625, "learning_rate": 4.665416195547585e-05, "loss": 2.2479, "step": 6555 }, { "epoch": 0.87, "grad_norm": 0.29296875, "learning_rate": 4.665310407829576e-05, "loss": 2.2937, "step": 6556 }, { "epoch": 0.87, "grad_norm": 0.27734375, "learning_rate": 4.665204604590141e-05, "loss": 2.2582, "step": 6557 }, { "epoch": 0.87, "grad_norm": 0.294921875, "learning_rate": 4.665098785830038e-05, "loss": 2.2948, "step": 6558 }, { "epoch": 0.87, "grad_norm": 0.2734375, "learning_rate": 4.664992951550025e-05, "loss": 2.2934, "step": 6559 }, { "epoch": 0.88, "grad_norm": 0.294921875, "learning_rate": 4.664887101750861e-05, "loss": 2.2929, "step": 6560 }, { "epoch": 0.88, "grad_norm": 0.291015625, "learning_rate": 4.664781236433306e-05, "loss": 2.2544, "step": 6561 }, { "epoch": 0.88, "grad_norm": 0.29296875, "learning_rate": 4.664675355598117e-05, "loss": 2.213, "step": 6562 }, { "epoch": 0.88, "grad_norm": 0.287109375, "learning_rate": 4.6645694592460546e-05, "loss": 2.2802, "step": 6563 }, { "epoch": 0.88, "grad_norm": 0.302734375, "learning_rate": 4.664463547377877e-05, "loss": 2.2641, "step": 6564 }, { "epoch": 0.88, "grad_norm": 0.30078125, "learning_rate": 4.664357619994343e-05, "loss": 2.2587, "step": 6565 }, { "epoch": 0.88, "grad_norm": 0.28515625, "learning_rate": 4.664251677096213e-05, "loss": 2.2515, "step": 6566 }, { "epoch": 0.88, "grad_norm": 0.3046875, "learning_rate": 4.664145718684246e-05, "loss": 2.2686, "step": 6567 }, { "epoch": 0.88, "grad_norm": 0.2890625, "learning_rate": 4.664039744759201e-05, "loss": 2.2622, "step": 6568 }, { "epoch": 0.88, "grad_norm": 0.298828125, "learning_rate": 4.663933755321838e-05, "loss": 2.2559, "step": 6569 }, { "epoch": 0.88, "grad_norm": 0.2890625, "learning_rate": 4.663827750372917e-05, "loss": 2.268, "step": 6570 }, { "epoch": 0.88, "grad_norm": 0.3046875, "learning_rate": 4.663721729913198e-05, "loss": 2.2527, "step": 6571 }, { "epoch": 0.88, "grad_norm": 0.279296875, "learning_rate": 4.663615693943441e-05, "loss": 2.276, "step": 6572 }, { "epoch": 0.88, "grad_norm": 0.28515625, "learning_rate": 4.663509642464404e-05, "loss": 2.2775, "step": 6573 }, { "epoch": 0.88, "grad_norm": 0.279296875, "learning_rate": 4.6634035754768505e-05, "loss": 2.2501, "step": 6574 }, { "epoch": 0.88, "grad_norm": 0.306640625, "learning_rate": 4.663297492981539e-05, "loss": 2.2811, "step": 6575 }, { "epoch": 0.88, "grad_norm": 0.28515625, "learning_rate": 4.66319139497923e-05, "loss": 2.293, "step": 6576 }, { "epoch": 0.88, "grad_norm": 0.30078125, "learning_rate": 4.6630852814706846e-05, "loss": 2.2379, "step": 6577 }, { "epoch": 0.88, "grad_norm": 0.28125, "learning_rate": 4.6629791524566624e-05, "loss": 2.2718, "step": 6578 }, { "epoch": 0.88, "grad_norm": 0.283203125, "learning_rate": 4.662873007937925e-05, "loss": 2.243, "step": 6579 }, { "epoch": 0.88, "grad_norm": 0.2734375, "learning_rate": 4.662766847915233e-05, "loss": 2.2679, "step": 6580 }, { "epoch": 0.88, "grad_norm": 0.30078125, "learning_rate": 4.662660672389348e-05, "loss": 2.2402, "step": 6581 }, { "epoch": 0.88, "grad_norm": 0.296875, "learning_rate": 4.66255448136103e-05, "loss": 2.2573, "step": 6582 }, { "epoch": 0.88, "grad_norm": 0.294921875, "learning_rate": 4.6624482748310415e-05, "loss": 2.268, "step": 6583 }, { "epoch": 0.88, "grad_norm": 0.28125, "learning_rate": 4.662342052800143e-05, "loss": 2.2646, "step": 6584 }, { "epoch": 0.88, "grad_norm": 0.294921875, "learning_rate": 4.662235815269095e-05, "loss": 2.2955, "step": 6585 }, { "epoch": 0.88, "grad_norm": 0.291015625, "learning_rate": 4.662129562238661e-05, "loss": 2.2404, "step": 6586 }, { "epoch": 0.88, "grad_norm": 0.302734375, "learning_rate": 4.662023293709601e-05, "loss": 2.2941, "step": 6587 }, { "epoch": 0.88, "grad_norm": 0.310546875, "learning_rate": 4.6619170096826776e-05, "loss": 2.2461, "step": 6588 }, { "epoch": 0.88, "grad_norm": 0.287109375, "learning_rate": 4.661810710158653e-05, "loss": 2.2779, "step": 6589 }, { "epoch": 0.88, "grad_norm": 0.310546875, "learning_rate": 4.6617043951382875e-05, "loss": 2.2488, "step": 6590 }, { "epoch": 0.88, "grad_norm": 0.29296875, "learning_rate": 4.661598064622346e-05, "loss": 2.2409, "step": 6591 }, { "epoch": 0.88, "grad_norm": 0.294921875, "learning_rate": 4.661491718611588e-05, "loss": 2.2467, "step": 6592 }, { "epoch": 0.88, "grad_norm": 0.283203125, "learning_rate": 4.6613853571067766e-05, "loss": 2.2692, "step": 6593 }, { "epoch": 0.88, "grad_norm": 0.298828125, "learning_rate": 4.6612789801086755e-05, "loss": 2.2948, "step": 6594 }, { "epoch": 0.88, "grad_norm": 0.30859375, "learning_rate": 4.661172587618046e-05, "loss": 2.2734, "step": 6595 }, { "epoch": 0.88, "grad_norm": 0.29296875, "learning_rate": 4.66106617963565e-05, "loss": 2.2559, "step": 6596 }, { "epoch": 0.88, "grad_norm": 0.29296875, "learning_rate": 4.660959756162252e-05, "loss": 2.2708, "step": 6597 }, { "epoch": 0.88, "grad_norm": 0.287109375, "learning_rate": 4.660853317198615e-05, "loss": 2.2693, "step": 6598 }, { "epoch": 0.88, "grad_norm": 0.27734375, "learning_rate": 4.6607468627455e-05, "loss": 2.2683, "step": 6599 }, { "epoch": 0.88, "grad_norm": 0.2890625, "learning_rate": 4.660640392803671e-05, "loss": 2.237, "step": 6600 }, { "epoch": 0.88, "grad_norm": 0.298828125, "learning_rate": 4.660533907373892e-05, "loss": 2.2602, "step": 6601 }, { "epoch": 0.88, "grad_norm": 0.28515625, "learning_rate": 4.660427406456925e-05, "loss": 2.2727, "step": 6602 }, { "epoch": 0.88, "grad_norm": 0.28515625, "learning_rate": 4.6603208900535346e-05, "loss": 2.2001, "step": 6603 }, { "epoch": 0.88, "grad_norm": 0.306640625, "learning_rate": 4.6602143581644836e-05, "loss": 2.2717, "step": 6604 }, { "epoch": 0.88, "grad_norm": 0.283203125, "learning_rate": 4.660107810790536e-05, "loss": 2.2439, "step": 6605 }, { "epoch": 0.88, "grad_norm": 0.296875, "learning_rate": 4.6600012479324555e-05, "loss": 2.2803, "step": 6606 }, { "epoch": 0.88, "grad_norm": 0.294921875, "learning_rate": 4.659894669591006e-05, "loss": 2.2586, "step": 6607 }, { "epoch": 0.88, "grad_norm": 0.3046875, "learning_rate": 4.659788075766951e-05, "loss": 2.2537, "step": 6608 }, { "epoch": 0.88, "grad_norm": 0.310546875, "learning_rate": 4.659681466461056e-05, "loss": 2.2701, "step": 6609 }, { "epoch": 0.88, "grad_norm": 0.294921875, "learning_rate": 4.659574841674082e-05, "loss": 2.2674, "step": 6610 }, { "epoch": 0.88, "grad_norm": 0.30078125, "learning_rate": 4.659468201406798e-05, "loss": 2.2612, "step": 6611 }, { "epoch": 0.88, "grad_norm": 0.29296875, "learning_rate": 4.6593615456599644e-05, "loss": 2.2691, "step": 6612 }, { "epoch": 0.88, "grad_norm": 0.3125, "learning_rate": 4.659254874434347e-05, "loss": 2.2605, "step": 6613 }, { "epoch": 0.88, "grad_norm": 0.294921875, "learning_rate": 4.659148187730712e-05, "loss": 2.2431, "step": 6614 }, { "epoch": 0.88, "grad_norm": 0.279296875, "learning_rate": 4.659041485549822e-05, "loss": 2.2446, "step": 6615 }, { "epoch": 0.88, "grad_norm": 0.310546875, "learning_rate": 4.658934767892442e-05, "loss": 2.2503, "step": 6616 }, { "epoch": 0.88, "grad_norm": 0.294921875, "learning_rate": 4.6588280347593395e-05, "loss": 2.2984, "step": 6617 }, { "epoch": 0.88, "grad_norm": 0.28515625, "learning_rate": 4.6587212861512765e-05, "loss": 2.2688, "step": 6618 }, { "epoch": 0.88, "grad_norm": 0.294921875, "learning_rate": 4.658614522069019e-05, "loss": 2.2459, "step": 6619 }, { "epoch": 0.88, "grad_norm": 0.306640625, "learning_rate": 4.658507742513334e-05, "loss": 2.2592, "step": 6620 }, { "epoch": 0.88, "grad_norm": 0.294921875, "learning_rate": 4.6584009474849846e-05, "loss": 2.2136, "step": 6621 }, { "epoch": 0.88, "grad_norm": 0.2890625, "learning_rate": 4.6582941369847385e-05, "loss": 2.2528, "step": 6622 }, { "epoch": 0.88, "grad_norm": 0.28515625, "learning_rate": 4.65818731101336e-05, "loss": 2.2672, "step": 6623 }, { "epoch": 0.88, "grad_norm": 0.29296875, "learning_rate": 4.658080469571614e-05, "loss": 2.2809, "step": 6624 }, { "epoch": 0.88, "grad_norm": 0.29296875, "learning_rate": 4.657973612660268e-05, "loss": 2.2457, "step": 6625 }, { "epoch": 0.88, "grad_norm": 0.3046875, "learning_rate": 4.657866740280088e-05, "loss": 2.2463, "step": 6626 }, { "epoch": 0.88, "grad_norm": 0.30078125, "learning_rate": 4.6577598524318396e-05, "loss": 2.2603, "step": 6627 }, { "epoch": 0.88, "grad_norm": 0.283203125, "learning_rate": 4.6576529491162886e-05, "loss": 2.2422, "step": 6628 }, { "epoch": 0.88, "grad_norm": 0.3125, "learning_rate": 4.657546030334201e-05, "loss": 2.2559, "step": 6629 }, { "epoch": 0.88, "grad_norm": 0.296875, "learning_rate": 4.657439096086345e-05, "loss": 2.2524, "step": 6630 }, { "epoch": 0.88, "grad_norm": 0.3046875, "learning_rate": 4.657332146373485e-05, "loss": 2.2483, "step": 6631 }, { "epoch": 0.88, "grad_norm": 0.3046875, "learning_rate": 4.657225181196389e-05, "loss": 2.2782, "step": 6632 }, { "epoch": 0.88, "grad_norm": 0.294921875, "learning_rate": 4.657118200555823e-05, "loss": 2.2333, "step": 6633 }, { "epoch": 0.88, "grad_norm": 0.291015625, "learning_rate": 4.657011204452555e-05, "loss": 2.2401, "step": 6634 }, { "epoch": 0.89, "grad_norm": 0.306640625, "learning_rate": 4.6569041928873514e-05, "loss": 2.2567, "step": 6635 }, { "epoch": 0.89, "grad_norm": 0.283203125, "learning_rate": 4.656797165860978e-05, "loss": 2.2743, "step": 6636 }, { "epoch": 0.89, "grad_norm": 0.283203125, "learning_rate": 4.656690123374204e-05, "loss": 2.2673, "step": 6637 }, { "epoch": 0.89, "grad_norm": 0.28125, "learning_rate": 4.656583065427795e-05, "loss": 2.2652, "step": 6638 }, { "epoch": 0.89, "grad_norm": 0.291015625, "learning_rate": 4.65647599202252e-05, "loss": 2.2789, "step": 6639 }, { "epoch": 0.89, "grad_norm": 0.26953125, "learning_rate": 4.656368903159145e-05, "loss": 2.2675, "step": 6640 }, { "epoch": 0.89, "grad_norm": 0.27734375, "learning_rate": 4.656261798838439e-05, "loss": 2.2496, "step": 6641 }, { "epoch": 0.89, "grad_norm": 0.2890625, "learning_rate": 4.656154679061169e-05, "loss": 2.2377, "step": 6642 }, { "epoch": 0.89, "grad_norm": 0.302734375, "learning_rate": 4.6560475438281024e-05, "loss": 2.2435, "step": 6643 }, { "epoch": 0.89, "grad_norm": 0.287109375, "learning_rate": 4.655940393140008e-05, "loss": 2.2572, "step": 6644 }, { "epoch": 0.89, "grad_norm": 0.30078125, "learning_rate": 4.655833226997654e-05, "loss": 2.2643, "step": 6645 }, { "epoch": 0.89, "grad_norm": 0.30078125, "learning_rate": 4.6557260454018075e-05, "loss": 2.306, "step": 6646 }, { "epoch": 0.89, "grad_norm": 0.302734375, "learning_rate": 4.6556188483532384e-05, "loss": 2.2876, "step": 6647 }, { "epoch": 0.89, "grad_norm": 0.287109375, "learning_rate": 4.655511635852713e-05, "loss": 2.2492, "step": 6648 }, { "epoch": 0.89, "grad_norm": 0.310546875, "learning_rate": 4.6554044079010016e-05, "loss": 2.2713, "step": 6649 }, { "epoch": 0.89, "grad_norm": 0.302734375, "learning_rate": 4.655297164498873e-05, "loss": 2.2337, "step": 6650 }, { "epoch": 0.89, "grad_norm": 0.28515625, "learning_rate": 4.655189905647095e-05, "loss": 2.2352, "step": 6651 }, { "epoch": 0.89, "grad_norm": 0.298828125, "learning_rate": 4.6550826313464355e-05, "loss": 2.2593, "step": 6652 }, { "epoch": 0.89, "grad_norm": 0.2890625, "learning_rate": 4.654975341597666e-05, "loss": 2.2531, "step": 6653 }, { "epoch": 0.89, "grad_norm": 0.291015625, "learning_rate": 4.6548680364015535e-05, "loss": 2.2448, "step": 6654 }, { "epoch": 0.89, "grad_norm": 0.3046875, "learning_rate": 4.654760715758868e-05, "loss": 2.2851, "step": 6655 }, { "epoch": 0.89, "grad_norm": 0.3046875, "learning_rate": 4.6546533796703784e-05, "loss": 2.2397, "step": 6656 }, { "epoch": 0.89, "grad_norm": 0.296875, "learning_rate": 4.654546028136855e-05, "loss": 2.2373, "step": 6657 }, { "epoch": 0.89, "grad_norm": 0.291015625, "learning_rate": 4.654438661159067e-05, "loss": 2.2368, "step": 6658 }, { "epoch": 0.89, "grad_norm": 0.3125, "learning_rate": 4.654331278737783e-05, "loss": 2.2639, "step": 6659 }, { "epoch": 0.89, "grad_norm": 0.296875, "learning_rate": 4.6542238808737734e-05, "loss": 2.2655, "step": 6660 }, { "epoch": 0.89, "grad_norm": 0.2890625, "learning_rate": 4.654116467567809e-05, "loss": 2.2737, "step": 6661 }, { "epoch": 0.89, "grad_norm": 0.291015625, "learning_rate": 4.654009038820658e-05, "loss": 2.2698, "step": 6662 }, { "epoch": 0.89, "grad_norm": 0.27734375, "learning_rate": 4.653901594633092e-05, "loss": 2.2569, "step": 6663 }, { "epoch": 0.89, "grad_norm": 0.29296875, "learning_rate": 4.6537941350058814e-05, "loss": 2.2581, "step": 6664 }, { "epoch": 0.89, "grad_norm": 0.287109375, "learning_rate": 4.6536866599397944e-05, "loss": 2.2643, "step": 6665 }, { "epoch": 0.89, "grad_norm": 0.291015625, "learning_rate": 4.653579169435602e-05, "loss": 2.262, "step": 6666 }, { "epoch": 0.89, "grad_norm": 0.279296875, "learning_rate": 4.653471663494077e-05, "loss": 2.2684, "step": 6667 }, { "epoch": 0.89, "grad_norm": 0.2890625, "learning_rate": 4.653364142115988e-05, "loss": 2.2772, "step": 6668 }, { "epoch": 0.89, "grad_norm": 0.291015625, "learning_rate": 4.653256605302105e-05, "loss": 2.2451, "step": 6669 }, { "epoch": 0.89, "grad_norm": 0.29296875, "learning_rate": 4.653149053053202e-05, "loss": 2.279, "step": 6670 }, { "epoch": 0.89, "grad_norm": 0.298828125, "learning_rate": 4.653041485370047e-05, "loss": 2.259, "step": 6671 }, { "epoch": 0.89, "grad_norm": 0.283203125, "learning_rate": 4.6529339022534114e-05, "loss": 2.2676, "step": 6672 }, { "epoch": 0.89, "grad_norm": 0.306640625, "learning_rate": 4.6528263037040675e-05, "loss": 2.2412, "step": 6673 }, { "epoch": 0.89, "grad_norm": 0.291015625, "learning_rate": 4.652718689722786e-05, "loss": 2.2749, "step": 6674 }, { "epoch": 0.89, "grad_norm": 0.283203125, "learning_rate": 4.652611060310339e-05, "loss": 2.2556, "step": 6675 }, { "epoch": 0.89, "grad_norm": 0.29296875, "learning_rate": 4.652503415467497e-05, "loss": 2.2473, "step": 6676 }, { "epoch": 0.89, "grad_norm": 0.275390625, "learning_rate": 4.652395755195032e-05, "loss": 2.2805, "step": 6677 }, { "epoch": 0.89, "grad_norm": 0.310546875, "learning_rate": 4.652288079493715e-05, "loss": 2.2306, "step": 6678 }, { "epoch": 0.89, "grad_norm": 0.294921875, "learning_rate": 4.65218038836432e-05, "loss": 2.2944, "step": 6679 }, { "epoch": 0.89, "grad_norm": 0.3046875, "learning_rate": 4.6520726818076165e-05, "loss": 2.2776, "step": 6680 }, { "epoch": 0.89, "grad_norm": 0.294921875, "learning_rate": 4.651964959824379e-05, "loss": 2.2666, "step": 6681 }, { "epoch": 0.89, "grad_norm": 0.39453125, "learning_rate": 4.651857222415377e-05, "loss": 2.2397, "step": 6682 }, { "epoch": 0.89, "grad_norm": 0.287109375, "learning_rate": 4.651749469581384e-05, "loss": 2.2961, "step": 6683 }, { "epoch": 0.89, "grad_norm": 0.302734375, "learning_rate": 4.651641701323173e-05, "loss": 2.2647, "step": 6684 }, { "epoch": 0.89, "grad_norm": 0.31640625, "learning_rate": 4.651533917641516e-05, "loss": 2.2682, "step": 6685 }, { "epoch": 0.89, "grad_norm": 0.3046875, "learning_rate": 4.6514261185371856e-05, "loss": 2.2681, "step": 6686 }, { "epoch": 0.89, "grad_norm": 0.296875, "learning_rate": 4.6513183040109545e-05, "loss": 2.2651, "step": 6687 }, { "epoch": 0.89, "grad_norm": 0.2890625, "learning_rate": 4.6512104740635955e-05, "loss": 2.2266, "step": 6688 }, { "epoch": 0.89, "grad_norm": 0.298828125, "learning_rate": 4.651102628695881e-05, "loss": 2.246, "step": 6689 }, { "epoch": 0.89, "grad_norm": 0.3046875, "learning_rate": 4.6509947679085865e-05, "loss": 2.2415, "step": 6690 }, { "epoch": 0.89, "grad_norm": 0.27734375, "learning_rate": 4.6508868917024815e-05, "loss": 2.2499, "step": 6691 }, { "epoch": 0.89, "grad_norm": 0.2890625, "learning_rate": 4.6507790000783426e-05, "loss": 2.2766, "step": 6692 }, { "epoch": 0.89, "grad_norm": 0.2890625, "learning_rate": 4.650671093036941e-05, "loss": 2.2692, "step": 6693 }, { "epoch": 0.89, "grad_norm": 0.30078125, "learning_rate": 4.650563170579051e-05, "loss": 2.2757, "step": 6694 }, { "epoch": 0.89, "grad_norm": 0.28515625, "learning_rate": 4.6504552327054455e-05, "loss": 2.2507, "step": 6695 }, { "epoch": 0.89, "grad_norm": 0.287109375, "learning_rate": 4.6503472794169e-05, "loss": 2.2989, "step": 6696 }, { "epoch": 0.89, "grad_norm": 0.2890625, "learning_rate": 4.650239310714186e-05, "loss": 2.2886, "step": 6697 }, { "epoch": 0.89, "grad_norm": 0.2890625, "learning_rate": 4.65013132659808e-05, "loss": 2.2555, "step": 6698 }, { "epoch": 0.89, "grad_norm": 0.294921875, "learning_rate": 4.650023327069354e-05, "loss": 2.242, "step": 6699 }, { "epoch": 0.89, "grad_norm": 0.30859375, "learning_rate": 4.649915312128782e-05, "loss": 2.2652, "step": 6700 }, { "epoch": 0.89, "grad_norm": 0.29296875, "learning_rate": 4.64980728177714e-05, "loss": 2.2915, "step": 6701 }, { "epoch": 0.89, "grad_norm": 0.3125, "learning_rate": 4.6496992360152016e-05, "loss": 2.2692, "step": 6702 }, { "epoch": 0.89, "grad_norm": 0.298828125, "learning_rate": 4.649591174843742e-05, "loss": 2.2657, "step": 6703 }, { "epoch": 0.89, "grad_norm": 0.296875, "learning_rate": 4.649483098263533e-05, "loss": 2.2708, "step": 6704 }, { "epoch": 0.89, "grad_norm": 0.302734375, "learning_rate": 4.649375006275353e-05, "loss": 2.2724, "step": 6705 }, { "epoch": 0.89, "grad_norm": 0.279296875, "learning_rate": 4.649266898879974e-05, "loss": 2.2469, "step": 6706 }, { "epoch": 0.89, "grad_norm": 0.30078125, "learning_rate": 4.649158776078173e-05, "loss": 2.311, "step": 6707 }, { "epoch": 0.89, "grad_norm": 0.291015625, "learning_rate": 4.6490506378707246e-05, "loss": 2.2436, "step": 6708 }, { "epoch": 0.89, "grad_norm": 0.291015625, "learning_rate": 4.648942484258402e-05, "loss": 2.2853, "step": 6709 }, { "epoch": 0.9, "grad_norm": 0.3046875, "learning_rate": 4.6488343152419836e-05, "loss": 2.2625, "step": 6710 }, { "epoch": 0.9, "grad_norm": 0.298828125, "learning_rate": 4.648726130822242e-05, "loss": 2.2781, "step": 6711 }, { "epoch": 0.9, "grad_norm": 0.30078125, "learning_rate": 4.648617930999954e-05, "loss": 2.2926, "step": 6712 }, { "epoch": 0.9, "grad_norm": 0.298828125, "learning_rate": 4.648509715775895e-05, "loss": 2.2503, "step": 6713 }, { "epoch": 0.9, "grad_norm": 0.279296875, "learning_rate": 4.648401485150841e-05, "loss": 2.2714, "step": 6714 }, { "epoch": 0.9, "grad_norm": 0.28125, "learning_rate": 4.6482932391255675e-05, "loss": 2.2692, "step": 6715 }, { "epoch": 0.9, "grad_norm": 0.291015625, "learning_rate": 4.648184977700851e-05, "loss": 2.2638, "step": 6716 }, { "epoch": 0.9, "grad_norm": 0.294921875, "learning_rate": 4.648076700877466e-05, "loss": 2.2549, "step": 6717 }, { "epoch": 0.9, "grad_norm": 0.296875, "learning_rate": 4.6479684086561906e-05, "loss": 2.239, "step": 6718 }, { "epoch": 0.9, "grad_norm": 0.310546875, "learning_rate": 4.6478601010378e-05, "loss": 2.2365, "step": 6719 }, { "epoch": 0.9, "grad_norm": 0.296875, "learning_rate": 4.647751778023071e-05, "loss": 2.2634, "step": 6720 }, { "epoch": 0.9, "grad_norm": 0.28515625, "learning_rate": 4.6476434396127786e-05, "loss": 2.2876, "step": 6721 }, { "epoch": 0.9, "grad_norm": 0.279296875, "learning_rate": 4.647535085807702e-05, "loss": 2.2721, "step": 6722 }, { "epoch": 0.9, "grad_norm": 0.28515625, "learning_rate": 4.647426716608616e-05, "loss": 2.2699, "step": 6723 }, { "epoch": 0.9, "grad_norm": 0.298828125, "learning_rate": 4.6473183320162985e-05, "loss": 2.2558, "step": 6724 }, { "epoch": 0.9, "grad_norm": 0.294921875, "learning_rate": 4.647209932031525e-05, "loss": 2.2732, "step": 6725 }, { "epoch": 0.9, "grad_norm": 0.27734375, "learning_rate": 4.647101516655074e-05, "loss": 2.2685, "step": 6726 }, { "epoch": 0.9, "grad_norm": 0.302734375, "learning_rate": 4.6469930858877215e-05, "loss": 2.2656, "step": 6727 }, { "epoch": 0.9, "grad_norm": 0.291015625, "learning_rate": 4.6468846397302454e-05, "loss": 2.2617, "step": 6728 }, { "epoch": 0.9, "grad_norm": 0.30859375, "learning_rate": 4.646776178183423e-05, "loss": 2.2779, "step": 6729 }, { "epoch": 0.9, "grad_norm": 0.291015625, "learning_rate": 4.646667701248032e-05, "loss": 2.2437, "step": 6730 }, { "epoch": 0.9, "grad_norm": 0.287109375, "learning_rate": 4.6465592089248496e-05, "loss": 2.2418, "step": 6731 }, { "epoch": 0.9, "grad_norm": 0.2890625, "learning_rate": 4.646450701214654e-05, "loss": 2.2409, "step": 6732 }, { "epoch": 0.9, "grad_norm": 0.29296875, "learning_rate": 4.646342178118222e-05, "loss": 2.2575, "step": 6733 }, { "epoch": 0.9, "grad_norm": 0.302734375, "learning_rate": 4.6462336396363324e-05, "loss": 2.2918, "step": 6734 }, { "epoch": 0.9, "grad_norm": 0.287109375, "learning_rate": 4.646125085769763e-05, "loss": 2.2777, "step": 6735 }, { "epoch": 0.9, "grad_norm": 0.294921875, "learning_rate": 4.646016516519292e-05, "loss": 2.2519, "step": 6736 }, { "epoch": 0.9, "grad_norm": 0.283203125, "learning_rate": 4.645907931885698e-05, "loss": 2.2242, "step": 6737 }, { "epoch": 0.9, "grad_norm": 0.283203125, "learning_rate": 4.645799331869758e-05, "loss": 2.2547, "step": 6738 }, { "epoch": 0.9, "grad_norm": 0.2890625, "learning_rate": 4.6456907164722516e-05, "loss": 2.3016, "step": 6739 }, { "epoch": 0.9, "grad_norm": 0.30078125, "learning_rate": 4.645582085693958e-05, "loss": 2.2528, "step": 6740 }, { "epoch": 0.9, "grad_norm": 0.28515625, "learning_rate": 4.6454734395356545e-05, "loss": 2.2206, "step": 6741 }, { "epoch": 0.9, "grad_norm": 0.2890625, "learning_rate": 4.64536477799812e-05, "loss": 2.2833, "step": 6742 }, { "epoch": 0.9, "grad_norm": 0.287109375, "learning_rate": 4.645256101082135e-05, "loss": 2.2447, "step": 6743 }, { "epoch": 0.9, "grad_norm": 0.279296875, "learning_rate": 4.645147408788476e-05, "loss": 2.2731, "step": 6744 }, { "epoch": 0.9, "grad_norm": 0.28125, "learning_rate": 4.645038701117924e-05, "loss": 2.2765, "step": 6745 }, { "epoch": 0.9, "grad_norm": 0.283203125, "learning_rate": 4.6449299780712585e-05, "loss": 2.2844, "step": 6746 }, { "epoch": 0.9, "grad_norm": 0.306640625, "learning_rate": 4.644821239649257e-05, "loss": 2.2498, "step": 6747 }, { "epoch": 0.9, "grad_norm": 0.28125, "learning_rate": 4.644712485852701e-05, "loss": 2.245, "step": 6748 }, { "epoch": 0.9, "grad_norm": 0.291015625, "learning_rate": 4.644603716682369e-05, "loss": 2.2859, "step": 6749 }, { "epoch": 0.9, "grad_norm": 0.275390625, "learning_rate": 4.644494932139041e-05, "loss": 2.2566, "step": 6750 }, { "epoch": 0.9, "grad_norm": 0.2890625, "learning_rate": 4.644386132223496e-05, "loss": 2.2613, "step": 6751 }, { "epoch": 0.9, "grad_norm": 0.302734375, "learning_rate": 4.644277316936515e-05, "loss": 2.249, "step": 6752 }, { "epoch": 0.9, "grad_norm": 0.302734375, "learning_rate": 4.644168486278878e-05, "loss": 2.2311, "step": 6753 }, { "epoch": 0.9, "grad_norm": 0.28125, "learning_rate": 4.6440596402513634e-05, "loss": 2.2467, "step": 6754 }, { "epoch": 0.9, "grad_norm": 0.283203125, "learning_rate": 4.643950778854753e-05, "loss": 2.2768, "step": 6755 }, { "epoch": 0.9, "grad_norm": 0.283203125, "learning_rate": 4.643841902089827e-05, "loss": 2.2857, "step": 6756 }, { "epoch": 0.9, "grad_norm": 0.28515625, "learning_rate": 4.643733009957366e-05, "loss": 2.2682, "step": 6757 }, { "epoch": 0.9, "grad_norm": 0.28515625, "learning_rate": 4.6436241024581506e-05, "loss": 2.2959, "step": 6758 }, { "epoch": 0.9, "grad_norm": 0.296875, "learning_rate": 4.6435151795929605e-05, "loss": 2.2547, "step": 6759 }, { "epoch": 0.9, "grad_norm": 0.302734375, "learning_rate": 4.643406241362577e-05, "loss": 2.2343, "step": 6760 }, { "epoch": 0.9, "grad_norm": 0.2890625, "learning_rate": 4.643297287767782e-05, "loss": 2.3067, "step": 6761 }, { "epoch": 0.9, "grad_norm": 0.30859375, "learning_rate": 4.6431883188093544e-05, "loss": 2.2856, "step": 6762 }, { "epoch": 0.9, "grad_norm": 0.30078125, "learning_rate": 4.643079334488077e-05, "loss": 2.2382, "step": 6763 }, { "epoch": 0.9, "grad_norm": 0.296875, "learning_rate": 4.6429703348047304e-05, "loss": 2.2845, "step": 6764 }, { "epoch": 0.9, "grad_norm": 0.287109375, "learning_rate": 4.642861319760097e-05, "loss": 2.2393, "step": 6765 }, { "epoch": 0.9, "grad_norm": 0.283203125, "learning_rate": 4.642752289354956e-05, "loss": 2.2401, "step": 6766 }, { "epoch": 0.9, "grad_norm": 0.287109375, "learning_rate": 4.6426432435900916e-05, "loss": 2.27, "step": 6767 }, { "epoch": 0.9, "grad_norm": 0.3046875, "learning_rate": 4.6425341824662824e-05, "loss": 2.2443, "step": 6768 }, { "epoch": 0.9, "grad_norm": 0.287109375, "learning_rate": 4.642425105984313e-05, "loss": 2.2814, "step": 6769 }, { "epoch": 0.9, "grad_norm": 0.30078125, "learning_rate": 4.642316014144964e-05, "loss": 2.2813, "step": 6770 }, { "epoch": 0.9, "grad_norm": 0.28125, "learning_rate": 4.642206906949018e-05, "loss": 2.2735, "step": 6771 }, { "epoch": 0.9, "grad_norm": 0.3203125, "learning_rate": 4.642097784397256e-05, "loss": 2.245, "step": 6772 }, { "epoch": 0.9, "grad_norm": 0.29296875, "learning_rate": 4.641988646490462e-05, "loss": 2.2784, "step": 6773 }, { "epoch": 0.9, "grad_norm": 0.29296875, "learning_rate": 4.6418794932294166e-05, "loss": 2.2711, "step": 6774 }, { "epoch": 0.9, "grad_norm": 0.279296875, "learning_rate": 4.6417703246149024e-05, "loss": 2.2701, "step": 6775 }, { "epoch": 0.9, "grad_norm": 0.296875, "learning_rate": 4.6416611406477026e-05, "loss": 2.2484, "step": 6776 }, { "epoch": 0.9, "grad_norm": 0.314453125, "learning_rate": 4.6415519413286005e-05, "loss": 2.2409, "step": 6777 }, { "epoch": 0.9, "grad_norm": 0.3125, "learning_rate": 4.6414427266583774e-05, "loss": 2.2934, "step": 6778 }, { "epoch": 0.9, "grad_norm": 0.294921875, "learning_rate": 4.6413334966378166e-05, "loss": 2.2447, "step": 6779 }, { "epoch": 0.9, "grad_norm": 0.302734375, "learning_rate": 4.6412242512677016e-05, "loss": 2.2553, "step": 6780 }, { "epoch": 0.9, "grad_norm": 0.2890625, "learning_rate": 4.641114990548815e-05, "loss": 2.2978, "step": 6781 }, { "epoch": 0.9, "grad_norm": 0.310546875, "learning_rate": 4.6410057144819405e-05, "loss": 2.2872, "step": 6782 }, { "epoch": 0.9, "grad_norm": 0.30078125, "learning_rate": 4.6408964230678605e-05, "loss": 2.2265, "step": 6783 }, { "epoch": 0.9, "grad_norm": 0.28515625, "learning_rate": 4.6407871163073596e-05, "loss": 2.2746, "step": 6784 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.64067779420122e-05, "loss": 2.2315, "step": 6785 }, { "epoch": 0.91, "grad_norm": 0.3203125, "learning_rate": 4.6405684567502274e-05, "loss": 2.2927, "step": 6786 }, { "epoch": 0.91, "grad_norm": 0.302734375, "learning_rate": 4.6404591039551636e-05, "loss": 2.2735, "step": 6787 }, { "epoch": 0.91, "grad_norm": 0.30078125, "learning_rate": 4.640349735816813e-05, "loss": 2.2315, "step": 6788 }, { "epoch": 0.91, "grad_norm": 0.27734375, "learning_rate": 4.64024035233596e-05, "loss": 2.2609, "step": 6789 }, { "epoch": 0.91, "grad_norm": 0.3125, "learning_rate": 4.6401309535133885e-05, "loss": 2.2592, "step": 6790 }, { "epoch": 0.91, "grad_norm": 0.287109375, "learning_rate": 4.6400215393498816e-05, "loss": 2.2388, "step": 6791 }, { "epoch": 0.91, "grad_norm": 0.298828125, "learning_rate": 4.639912109846226e-05, "loss": 2.2692, "step": 6792 }, { "epoch": 0.91, "grad_norm": 0.28125, "learning_rate": 4.639802665003204e-05, "loss": 2.2566, "step": 6793 }, { "epoch": 0.91, "grad_norm": 0.298828125, "learning_rate": 4.639693204821601e-05, "loss": 2.2461, "step": 6794 }, { "epoch": 0.91, "grad_norm": 0.2890625, "learning_rate": 4.639583729302202e-05, "loss": 2.2765, "step": 6795 }, { "epoch": 0.91, "grad_norm": 0.306640625, "learning_rate": 4.63947423844579e-05, "loss": 2.2289, "step": 6796 }, { "epoch": 0.91, "grad_norm": 0.3125, "learning_rate": 4.6393647322531516e-05, "loss": 2.2413, "step": 6797 }, { "epoch": 0.91, "grad_norm": 0.294921875, "learning_rate": 4.639255210725071e-05, "loss": 2.2449, "step": 6798 }, { "epoch": 0.91, "grad_norm": 0.2890625, "learning_rate": 4.639145673862334e-05, "loss": 2.298, "step": 6799 }, { "epoch": 0.91, "grad_norm": 0.298828125, "learning_rate": 4.6390361216657254e-05, "loss": 2.2935, "step": 6800 }, { "epoch": 0.91, "grad_norm": 0.294921875, "learning_rate": 4.6389265541360296e-05, "loss": 2.2483, "step": 6801 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.638816971274034e-05, "loss": 2.2615, "step": 6802 }, { "epoch": 0.91, "grad_norm": 0.298828125, "learning_rate": 4.638707373080522e-05, "loss": 2.2694, "step": 6803 }, { "epoch": 0.91, "grad_norm": 0.291015625, "learning_rate": 4.63859775955628e-05, "loss": 2.259, "step": 6804 }, { "epoch": 0.91, "grad_norm": 0.291015625, "learning_rate": 4.638488130702095e-05, "loss": 2.2146, "step": 6805 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.6383784865187504e-05, "loss": 2.2776, "step": 6806 }, { "epoch": 0.91, "grad_norm": 0.279296875, "learning_rate": 4.638268827007034e-05, "loss": 2.2713, "step": 6807 }, { "epoch": 0.91, "grad_norm": 0.30078125, "learning_rate": 4.638159152167731e-05, "loss": 2.223, "step": 6808 }, { "epoch": 0.91, "grad_norm": 0.298828125, "learning_rate": 4.638049462001628e-05, "loss": 2.2347, "step": 6809 }, { "epoch": 0.91, "grad_norm": 0.28125, "learning_rate": 4.637939756509512e-05, "loss": 2.2631, "step": 6810 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.6378300356921675e-05, "loss": 2.2492, "step": 6811 }, { "epoch": 0.91, "grad_norm": 0.298828125, "learning_rate": 4.637720299550383e-05, "loss": 2.2708, "step": 6812 }, { "epoch": 0.91, "grad_norm": 0.294921875, "learning_rate": 4.637610548084943e-05, "loss": 2.2635, "step": 6813 }, { "epoch": 0.91, "grad_norm": 0.294921875, "learning_rate": 4.637500781296637e-05, "loss": 2.2511, "step": 6814 }, { "epoch": 0.91, "grad_norm": 0.283203125, "learning_rate": 4.637390999186249e-05, "loss": 2.27, "step": 6815 }, { "epoch": 0.91, "grad_norm": 0.28515625, "learning_rate": 4.6372812017545675e-05, "loss": 2.2232, "step": 6816 }, { "epoch": 0.91, "grad_norm": 0.28125, "learning_rate": 4.637171389002379e-05, "loss": 2.2668, "step": 6817 }, { "epoch": 0.91, "grad_norm": 0.28515625, "learning_rate": 4.6370615609304715e-05, "loss": 2.2504, "step": 6818 }, { "epoch": 0.91, "grad_norm": 0.302734375, "learning_rate": 4.6369517175396315e-05, "loss": 2.288, "step": 6819 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.636841858830646e-05, "loss": 2.2435, "step": 6820 }, { "epoch": 0.91, "grad_norm": 0.310546875, "learning_rate": 4.636731984804304e-05, "loss": 2.3022, "step": 6821 }, { "epoch": 0.91, "grad_norm": 0.279296875, "learning_rate": 4.6366220954613916e-05, "loss": 2.2509, "step": 6822 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.6365121908026975e-05, "loss": 2.2735, "step": 6823 }, { "epoch": 0.91, "grad_norm": 0.31640625, "learning_rate": 4.636402270829009e-05, "loss": 2.2382, "step": 6824 }, { "epoch": 0.91, "grad_norm": 0.3046875, "learning_rate": 4.6362923355411134e-05, "loss": 2.2461, "step": 6825 }, { "epoch": 0.91, "grad_norm": 0.275390625, "learning_rate": 4.6361823849398e-05, "loss": 2.2505, "step": 6826 }, { "epoch": 0.91, "grad_norm": 0.306640625, "learning_rate": 4.6360724190258566e-05, "loss": 2.239, "step": 6827 }, { "epoch": 0.91, "grad_norm": 0.2890625, "learning_rate": 4.635962437800071e-05, "loss": 2.2924, "step": 6828 }, { "epoch": 0.91, "grad_norm": 0.28515625, "learning_rate": 4.635852441263232e-05, "loss": 2.2483, "step": 6829 }, { "epoch": 0.91, "grad_norm": 0.287109375, "learning_rate": 4.635742429416128e-05, "loss": 2.252, "step": 6830 }, { "epoch": 0.91, "grad_norm": 0.306640625, "learning_rate": 4.6356324022595465e-05, "loss": 2.2871, "step": 6831 }, { "epoch": 0.91, "grad_norm": 0.29296875, "learning_rate": 4.6355223597942786e-05, "loss": 2.276, "step": 6832 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.635412302021111e-05, "loss": 2.238, "step": 6833 }, { "epoch": 0.91, "grad_norm": 0.294921875, "learning_rate": 4.635302228940833e-05, "loss": 2.2814, "step": 6834 }, { "epoch": 0.91, "grad_norm": 0.29296875, "learning_rate": 4.635192140554235e-05, "loss": 2.2397, "step": 6835 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.635082036862104e-05, "loss": 2.2802, "step": 6836 }, { "epoch": 0.91, "grad_norm": 0.291015625, "learning_rate": 4.6349719178652306e-05, "loss": 2.2625, "step": 6837 }, { "epoch": 0.91, "grad_norm": 0.294921875, "learning_rate": 4.6348617835644046e-05, "loss": 2.2669, "step": 6838 }, { "epoch": 0.91, "grad_norm": 0.291015625, "learning_rate": 4.6347516339604145e-05, "loss": 2.2713, "step": 6839 }, { "epoch": 0.91, "grad_norm": 0.29296875, "learning_rate": 4.63464146905405e-05, "loss": 2.2888, "step": 6840 }, { "epoch": 0.91, "grad_norm": 0.291015625, "learning_rate": 4.6345312888461015e-05, "loss": 2.2687, "step": 6841 }, { "epoch": 0.91, "grad_norm": 0.28125, "learning_rate": 4.634421093337357e-05, "loss": 2.2717, "step": 6842 }, { "epoch": 0.91, "grad_norm": 0.28125, "learning_rate": 4.634310882528608e-05, "loss": 2.2553, "step": 6843 }, { "epoch": 0.91, "grad_norm": 0.294921875, "learning_rate": 4.6342006564206445e-05, "loss": 2.2381, "step": 6844 }, { "epoch": 0.91, "grad_norm": 0.283203125, "learning_rate": 4.634090415014256e-05, "loss": 2.2608, "step": 6845 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.6339801583102335e-05, "loss": 2.2776, "step": 6846 }, { "epoch": 0.91, "grad_norm": 0.287109375, "learning_rate": 4.6338698863093666e-05, "loss": 2.2397, "step": 6847 }, { "epoch": 0.91, "grad_norm": 0.298828125, "learning_rate": 4.633759599012446e-05, "loss": 2.2773, "step": 6848 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.6336492964202626e-05, "loss": 2.2584, "step": 6849 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.6335389785336064e-05, "loss": 2.2458, "step": 6850 }, { "epoch": 0.91, "grad_norm": 0.3046875, "learning_rate": 4.633428645353268e-05, "loss": 2.283, "step": 6851 }, { "epoch": 0.91, "grad_norm": 0.29296875, "learning_rate": 4.63331829688004e-05, "loss": 2.2494, "step": 6852 }, { "epoch": 0.91, "grad_norm": 0.3046875, "learning_rate": 4.6332079331147116e-05, "loss": 2.2342, "step": 6853 }, { "epoch": 0.91, "grad_norm": 0.30859375, "learning_rate": 4.633097554058074e-05, "loss": 2.2503, "step": 6854 }, { "epoch": 0.91, "grad_norm": 0.296875, "learning_rate": 4.63298715971092e-05, "loss": 2.2671, "step": 6855 }, { "epoch": 0.91, "grad_norm": 0.30859375, "learning_rate": 4.632876750074039e-05, "loss": 2.247, "step": 6856 }, { "epoch": 0.91, "grad_norm": 0.294921875, "learning_rate": 4.6327663251482245e-05, "loss": 2.2532, "step": 6857 }, { "epoch": 0.91, "grad_norm": 0.275390625, "learning_rate": 4.632655884934266e-05, "loss": 2.2362, "step": 6858 }, { "epoch": 0.91, "grad_norm": 0.291015625, "learning_rate": 4.632545429432956e-05, "loss": 2.2868, "step": 6859 }, { "epoch": 0.92, "grad_norm": 0.283203125, "learning_rate": 4.6324349586450864e-05, "loss": 2.2679, "step": 6860 }, { "epoch": 0.92, "grad_norm": 0.28125, "learning_rate": 4.6323244725714496e-05, "loss": 2.2566, "step": 6861 }, { "epoch": 0.92, "grad_norm": 0.283203125, "learning_rate": 4.632213971212836e-05, "loss": 2.2603, "step": 6862 }, { "epoch": 0.92, "grad_norm": 0.28515625, "learning_rate": 4.632103454570039e-05, "loss": 2.2692, "step": 6863 }, { "epoch": 0.92, "grad_norm": 0.306640625, "learning_rate": 4.6319929226438506e-05, "loss": 2.2381, "step": 6864 }, { "epoch": 0.92, "grad_norm": 0.287109375, "learning_rate": 4.631882375435064e-05, "loss": 2.2588, "step": 6865 }, { "epoch": 0.92, "grad_norm": 0.287109375, "learning_rate": 4.6317718129444694e-05, "loss": 2.2714, "step": 6866 }, { "epoch": 0.92, "grad_norm": 0.27734375, "learning_rate": 4.6316612351728604e-05, "loss": 2.2765, "step": 6867 }, { "epoch": 0.92, "grad_norm": 0.302734375, "learning_rate": 4.631550642121031e-05, "loss": 2.2534, "step": 6868 }, { "epoch": 0.92, "grad_norm": 0.2890625, "learning_rate": 4.631440033789771e-05, "loss": 2.2647, "step": 6869 }, { "epoch": 0.92, "grad_norm": 0.296875, "learning_rate": 4.6313294101798764e-05, "loss": 2.2844, "step": 6870 }, { "epoch": 0.92, "grad_norm": 0.302734375, "learning_rate": 4.6312187712921385e-05, "loss": 2.2484, "step": 6871 }, { "epoch": 0.92, "grad_norm": 0.291015625, "learning_rate": 4.631108117127351e-05, "loss": 2.3021, "step": 6872 }, { "epoch": 0.92, "grad_norm": 0.28125, "learning_rate": 4.630997447686307e-05, "loss": 2.2892, "step": 6873 }, { "epoch": 0.92, "grad_norm": 0.2890625, "learning_rate": 4.6308867629697984e-05, "loss": 2.2722, "step": 6874 }, { "epoch": 0.92, "grad_norm": 0.291015625, "learning_rate": 4.630776062978621e-05, "loss": 2.2407, "step": 6875 }, { "epoch": 0.92, "grad_norm": 0.28515625, "learning_rate": 4.6306653477135665e-05, "loss": 2.2817, "step": 6876 }, { "epoch": 0.92, "grad_norm": 0.287109375, "learning_rate": 4.6305546171754285e-05, "loss": 2.2378, "step": 6877 }, { "epoch": 0.92, "grad_norm": 0.291015625, "learning_rate": 4.630443871365002e-05, "loss": 2.2757, "step": 6878 }, { "epoch": 0.92, "grad_norm": 0.29296875, "learning_rate": 4.630333110283081e-05, "loss": 2.2762, "step": 6879 }, { "epoch": 0.92, "grad_norm": 0.3046875, "learning_rate": 4.630222333930458e-05, "loss": 2.2652, "step": 6880 }, { "epoch": 0.92, "grad_norm": 0.30078125, "learning_rate": 4.630111542307928e-05, "loss": 2.2393, "step": 6881 }, { "epoch": 0.92, "grad_norm": 0.298828125, "learning_rate": 4.630000735416285e-05, "loss": 2.2492, "step": 6882 }, { "epoch": 0.92, "grad_norm": 0.275390625, "learning_rate": 4.629889913256323e-05, "loss": 2.2679, "step": 6883 }, { "epoch": 0.92, "grad_norm": 0.298828125, "learning_rate": 4.6297790758288365e-05, "loss": 2.2817, "step": 6884 }, { "epoch": 0.92, "grad_norm": 0.287109375, "learning_rate": 4.629668223134621e-05, "loss": 2.2587, "step": 6885 }, { "epoch": 0.92, "grad_norm": 0.314453125, "learning_rate": 4.6295573551744696e-05, "loss": 2.2923, "step": 6886 }, { "epoch": 0.92, "grad_norm": 0.283203125, "learning_rate": 4.629446471949178e-05, "loss": 2.2655, "step": 6887 }, { "epoch": 0.92, "grad_norm": 0.310546875, "learning_rate": 4.62933557345954e-05, "loss": 2.2638, "step": 6888 }, { "epoch": 0.92, "grad_norm": 0.29296875, "learning_rate": 4.629224659706352e-05, "loss": 2.264, "step": 6889 }, { "epoch": 0.92, "grad_norm": 0.294921875, "learning_rate": 4.6291137306904075e-05, "loss": 2.2297, "step": 6890 }, { "epoch": 0.92, "grad_norm": 0.287109375, "learning_rate": 4.6290027864125035e-05, "loss": 2.2753, "step": 6891 }, { "epoch": 0.92, "grad_norm": 0.296875, "learning_rate": 4.628891826873434e-05, "loss": 2.2966, "step": 6892 }, { "epoch": 0.92, "grad_norm": 0.3125, "learning_rate": 4.628780852073995e-05, "loss": 2.2557, "step": 6893 }, { "epoch": 0.92, "grad_norm": 0.291015625, "learning_rate": 4.6286698620149805e-05, "loss": 2.2486, "step": 6894 }, { "epoch": 0.92, "grad_norm": 0.296875, "learning_rate": 4.628558856697188e-05, "loss": 2.2611, "step": 6895 }, { "epoch": 0.92, "grad_norm": 0.279296875, "learning_rate": 4.6284478361214124e-05, "loss": 2.2701, "step": 6896 }, { "epoch": 0.92, "grad_norm": 0.287109375, "learning_rate": 4.62833680028845e-05, "loss": 2.2495, "step": 6897 }, { "epoch": 0.92, "grad_norm": 0.283203125, "learning_rate": 4.6282257491990954e-05, "loss": 2.2692, "step": 6898 }, { "epoch": 0.92, "grad_norm": 0.298828125, "learning_rate": 4.628114682854146e-05, "loss": 2.249, "step": 6899 }, { "epoch": 0.92, "grad_norm": 0.32421875, "learning_rate": 4.628003601254397e-05, "loss": 2.2765, "step": 6900 }, { "epoch": 0.92, "grad_norm": 0.306640625, "learning_rate": 4.627892504400646e-05, "loss": 2.2393, "step": 6901 }, { "epoch": 0.92, "grad_norm": 0.298828125, "learning_rate": 4.627781392293688e-05, "loss": 2.2441, "step": 6902 }, { "epoch": 0.92, "grad_norm": 0.29296875, "learning_rate": 4.6276702649343205e-05, "loss": 2.2709, "step": 6903 }, { "epoch": 0.92, "grad_norm": 0.30078125, "learning_rate": 4.627559122323339e-05, "loss": 2.261, "step": 6904 }, { "epoch": 0.92, "grad_norm": 0.302734375, "learning_rate": 4.627447964461541e-05, "loss": 2.2632, "step": 6905 }, { "epoch": 0.92, "grad_norm": 0.283203125, "learning_rate": 4.627336791349723e-05, "loss": 2.2623, "step": 6906 }, { "epoch": 0.92, "grad_norm": 0.29296875, "learning_rate": 4.627225602988682e-05, "loss": 2.2719, "step": 6907 }, { "epoch": 0.92, "grad_norm": 0.322265625, "learning_rate": 4.6271143993792155e-05, "loss": 2.2626, "step": 6908 }, { "epoch": 0.92, "grad_norm": 0.27734375, "learning_rate": 4.627003180522119e-05, "loss": 2.2893, "step": 6909 }, { "epoch": 0.92, "grad_norm": 0.291015625, "learning_rate": 4.6268919464181914e-05, "loss": 2.2525, "step": 6910 }, { "epoch": 0.92, "grad_norm": 0.31640625, "learning_rate": 4.62678069706823e-05, "loss": 2.2559, "step": 6911 }, { "epoch": 0.92, "grad_norm": 0.302734375, "learning_rate": 4.626669432473032e-05, "loss": 2.2349, "step": 6912 }, { "epoch": 0.92, "grad_norm": 0.298828125, "learning_rate": 4.626558152633394e-05, "loss": 2.2518, "step": 6913 }, { "epoch": 0.92, "grad_norm": 0.310546875, "learning_rate": 4.6264468575501143e-05, "loss": 2.2668, "step": 6914 }, { "epoch": 0.92, "grad_norm": 0.287109375, "learning_rate": 4.6263355472239915e-05, "loss": 2.2429, "step": 6915 }, { "epoch": 0.92, "grad_norm": 0.283203125, "learning_rate": 4.6262242216558224e-05, "loss": 2.2784, "step": 6916 }, { "epoch": 0.92, "grad_norm": 0.283203125, "learning_rate": 4.626112880846406e-05, "loss": 2.2478, "step": 6917 }, { "epoch": 0.92, "grad_norm": 0.30078125, "learning_rate": 4.6260015247965394e-05, "loss": 2.2609, "step": 6918 }, { "epoch": 0.92, "grad_norm": 0.28125, "learning_rate": 4.625890153507021e-05, "loss": 2.286, "step": 6919 }, { "epoch": 0.92, "grad_norm": 0.27734375, "learning_rate": 4.62577876697865e-05, "loss": 2.2569, "step": 6920 }, { "epoch": 0.92, "grad_norm": 0.275390625, "learning_rate": 4.625667365212224e-05, "loss": 2.2557, "step": 6921 }, { "epoch": 0.92, "grad_norm": 0.275390625, "learning_rate": 4.625555948208542e-05, "loss": 2.2638, "step": 6922 }, { "epoch": 0.92, "grad_norm": 0.28125, "learning_rate": 4.6254445159684025e-05, "loss": 2.237, "step": 6923 }, { "epoch": 0.92, "grad_norm": 0.2890625, "learning_rate": 4.6253330684926035e-05, "loss": 2.2666, "step": 6924 }, { "epoch": 0.92, "grad_norm": 0.298828125, "learning_rate": 4.625221605781945e-05, "loss": 2.2495, "step": 6925 }, { "epoch": 0.92, "grad_norm": 0.294921875, "learning_rate": 4.6251101278372267e-05, "loss": 2.2871, "step": 6926 }, { "epoch": 0.92, "grad_norm": 0.294921875, "learning_rate": 4.624998634659245e-05, "loss": 2.2462, "step": 6927 }, { "epoch": 0.92, "grad_norm": 0.306640625, "learning_rate": 4.6248871262488013e-05, "loss": 2.2656, "step": 6928 }, { "epoch": 0.92, "grad_norm": 0.28125, "learning_rate": 4.6247756026066945e-05, "loss": 2.262, "step": 6929 }, { "epoch": 0.92, "grad_norm": 0.306640625, "learning_rate": 4.6246640637337244e-05, "loss": 2.2495, "step": 6930 }, { "epoch": 0.92, "grad_norm": 0.2890625, "learning_rate": 4.624552509630689e-05, "loss": 2.2663, "step": 6931 }, { "epoch": 0.92, "grad_norm": 0.318359375, "learning_rate": 4.62444094029839e-05, "loss": 2.2825, "step": 6932 }, { "epoch": 0.92, "grad_norm": 0.287109375, "learning_rate": 4.624329355737625e-05, "loss": 2.2801, "step": 6933 }, { "epoch": 0.92, "grad_norm": 0.275390625, "learning_rate": 4.624217755949195e-05, "loss": 2.2579, "step": 6934 }, { "epoch": 0.93, "grad_norm": 0.2890625, "learning_rate": 4.6241061409339006e-05, "loss": 2.2661, "step": 6935 }, { "epoch": 0.93, "grad_norm": 0.306640625, "learning_rate": 4.623994510692541e-05, "loss": 2.2654, "step": 6936 }, { "epoch": 0.93, "grad_norm": 0.291015625, "learning_rate": 4.623882865225916e-05, "loss": 2.2773, "step": 6937 }, { "epoch": 0.93, "grad_norm": 0.283203125, "learning_rate": 4.623771204534827e-05, "loss": 2.2805, "step": 6938 }, { "epoch": 0.93, "grad_norm": 0.287109375, "learning_rate": 4.623659528620075e-05, "loss": 2.2664, "step": 6939 }, { "epoch": 0.93, "grad_norm": 0.3125, "learning_rate": 4.623547837482458e-05, "loss": 2.2654, "step": 6940 }, { "epoch": 0.93, "grad_norm": 0.30078125, "learning_rate": 4.623436131122778e-05, "loss": 2.2743, "step": 6941 }, { "epoch": 0.93, "grad_norm": 0.296875, "learning_rate": 4.623324409541837e-05, "loss": 2.2608, "step": 6942 }, { "epoch": 0.93, "grad_norm": 0.30078125, "learning_rate": 4.6232126727404335e-05, "loss": 2.2836, "step": 6943 }, { "epoch": 0.93, "grad_norm": 0.30859375, "learning_rate": 4.62310092071937e-05, "loss": 2.226, "step": 6944 }, { "epoch": 0.93, "grad_norm": 0.27734375, "learning_rate": 4.622989153479447e-05, "loss": 2.2487, "step": 6945 }, { "epoch": 0.93, "grad_norm": 0.283203125, "learning_rate": 4.6228773710214656e-05, "loss": 2.2729, "step": 6946 }, { "epoch": 0.93, "grad_norm": 0.294921875, "learning_rate": 4.6227655733462277e-05, "loss": 2.2576, "step": 6947 }, { "epoch": 0.93, "grad_norm": 0.322265625, "learning_rate": 4.622653760454534e-05, "loss": 2.2688, "step": 6948 }, { "epoch": 0.93, "grad_norm": 0.296875, "learning_rate": 4.622541932347187e-05, "loss": 2.2436, "step": 6949 }, { "epoch": 0.93, "grad_norm": 0.302734375, "learning_rate": 4.622430089024987e-05, "loss": 2.2302, "step": 6950 }, { "epoch": 0.93, "grad_norm": 0.296875, "learning_rate": 4.622318230488736e-05, "loss": 2.2844, "step": 6951 }, { "epoch": 0.93, "grad_norm": 0.314453125, "learning_rate": 4.622206356739237e-05, "loss": 2.2453, "step": 6952 }, { "epoch": 0.93, "grad_norm": 0.275390625, "learning_rate": 4.6220944677772905e-05, "loss": 2.2524, "step": 6953 }, { "epoch": 0.93, "grad_norm": 0.283203125, "learning_rate": 4.6219825636036995e-05, "loss": 2.2531, "step": 6954 }, { "epoch": 0.93, "grad_norm": 0.28515625, "learning_rate": 4.621870644219266e-05, "loss": 2.2898, "step": 6955 }, { "epoch": 0.93, "grad_norm": 0.296875, "learning_rate": 4.6217587096247914e-05, "loss": 2.2363, "step": 6956 }, { "epoch": 0.93, "grad_norm": 0.32421875, "learning_rate": 4.621646759821079e-05, "loss": 2.2694, "step": 6957 }, { "epoch": 0.93, "grad_norm": 0.28515625, "learning_rate": 4.6215347948089304e-05, "loss": 2.2662, "step": 6958 }, { "epoch": 0.93, "grad_norm": 0.3046875, "learning_rate": 4.62142281458915e-05, "loss": 2.2698, "step": 6959 }, { "epoch": 0.93, "grad_norm": 0.28125, "learning_rate": 4.621310819162539e-05, "loss": 2.2619, "step": 6960 }, { "epoch": 0.93, "grad_norm": 0.3046875, "learning_rate": 4.6211988085298994e-05, "loss": 2.227, "step": 6961 }, { "epoch": 0.93, "grad_norm": 0.29296875, "learning_rate": 4.621086782692036e-05, "loss": 2.2464, "step": 6962 }, { "epoch": 0.93, "grad_norm": 0.2890625, "learning_rate": 4.6209747416497506e-05, "loss": 2.2393, "step": 6963 }, { "epoch": 0.93, "grad_norm": 0.28515625, "learning_rate": 4.620862685403847e-05, "loss": 2.2436, "step": 6964 }, { "epoch": 0.93, "grad_norm": 0.279296875, "learning_rate": 4.620750613955129e-05, "loss": 2.2748, "step": 6965 }, { "epoch": 0.93, "grad_norm": 0.291015625, "learning_rate": 4.620638527304398e-05, "loss": 2.2626, "step": 6966 }, { "epoch": 0.93, "grad_norm": 0.302734375, "learning_rate": 4.6205264254524596e-05, "loss": 2.3094, "step": 6967 }, { "epoch": 0.93, "grad_norm": 0.302734375, "learning_rate": 4.620414308400116e-05, "loss": 2.2419, "step": 6968 }, { "epoch": 0.93, "grad_norm": 0.2890625, "learning_rate": 4.6203021761481714e-05, "loss": 2.2924, "step": 6969 }, { "epoch": 0.93, "grad_norm": 0.27734375, "learning_rate": 4.62019002869743e-05, "loss": 2.2007, "step": 6970 }, { "epoch": 0.93, "grad_norm": 0.296875, "learning_rate": 4.6200778660486944e-05, "loss": 2.2324, "step": 6971 }, { "epoch": 0.93, "grad_norm": 0.298828125, "learning_rate": 4.619965688202769e-05, "loss": 2.2976, "step": 6972 }, { "epoch": 0.93, "grad_norm": 0.287109375, "learning_rate": 4.619853495160459e-05, "loss": 2.2685, "step": 6973 }, { "epoch": 0.93, "grad_norm": 0.296875, "learning_rate": 4.619741286922568e-05, "loss": 2.2393, "step": 6974 }, { "epoch": 0.93, "grad_norm": 0.296875, "learning_rate": 4.6196290634898997e-05, "loss": 2.2243, "step": 6975 }, { "epoch": 0.93, "grad_norm": 0.298828125, "learning_rate": 4.619516824863259e-05, "loss": 2.2499, "step": 6976 }, { "epoch": 0.93, "grad_norm": 0.2890625, "learning_rate": 4.6194045710434514e-05, "loss": 2.2586, "step": 6977 }, { "epoch": 0.93, "grad_norm": 0.29296875, "learning_rate": 4.6192923020312805e-05, "loss": 2.2603, "step": 6978 }, { "epoch": 0.93, "grad_norm": 0.283203125, "learning_rate": 4.6191800178275506e-05, "loss": 2.2601, "step": 6979 }, { "epoch": 0.93, "grad_norm": 0.28515625, "learning_rate": 4.6190677184330675e-05, "loss": 2.2658, "step": 6980 }, { "epoch": 0.93, "grad_norm": 0.28515625, "learning_rate": 4.618955403848637e-05, "loss": 2.2705, "step": 6981 }, { "epoch": 0.93, "grad_norm": 0.296875, "learning_rate": 4.618843074075062e-05, "loss": 2.2589, "step": 6982 }, { "epoch": 0.93, "grad_norm": 0.27734375, "learning_rate": 4.618730729113149e-05, "loss": 2.2367, "step": 6983 }, { "epoch": 0.93, "grad_norm": 0.28515625, "learning_rate": 4.618618368963703e-05, "loss": 2.2487, "step": 6984 }, { "epoch": 0.93, "grad_norm": 0.287109375, "learning_rate": 4.6185059936275296e-05, "loss": 2.2545, "step": 6985 }, { "epoch": 0.93, "grad_norm": 0.287109375, "learning_rate": 4.618393603105435e-05, "loss": 2.3002, "step": 6986 }, { "epoch": 0.93, "grad_norm": 0.283203125, "learning_rate": 4.618281197398224e-05, "loss": 2.2862, "step": 6987 }, { "epoch": 0.93, "grad_norm": 0.291015625, "learning_rate": 4.6181687765067015e-05, "loss": 2.2444, "step": 6988 }, { "epoch": 0.93, "grad_norm": 0.27734375, "learning_rate": 4.618056340431676e-05, "loss": 2.2576, "step": 6989 }, { "epoch": 0.93, "grad_norm": 0.302734375, "learning_rate": 4.61794388917395e-05, "loss": 2.2353, "step": 6990 }, { "epoch": 0.93, "grad_norm": 0.291015625, "learning_rate": 4.617831422734333e-05, "loss": 2.2543, "step": 6991 }, { "epoch": 0.93, "grad_norm": 0.2890625, "learning_rate": 4.6177189411136286e-05, "loss": 2.2714, "step": 6992 }, { "epoch": 0.93, "grad_norm": 0.2890625, "learning_rate": 4.6176064443126447e-05, "loss": 2.2562, "step": 6993 }, { "epoch": 0.93, "grad_norm": 0.26953125, "learning_rate": 4.6174939323321865e-05, "loss": 2.2502, "step": 6994 }, { "epoch": 0.93, "grad_norm": 0.29296875, "learning_rate": 4.617381405173061e-05, "loss": 2.2216, "step": 6995 }, { "epoch": 0.93, "grad_norm": 0.29296875, "learning_rate": 4.617268862836075e-05, "loss": 2.2689, "step": 6996 }, { "epoch": 0.93, "grad_norm": 0.294921875, "learning_rate": 4.6171563053220356e-05, "loss": 2.2718, "step": 6997 }, { "epoch": 0.93, "grad_norm": 0.30078125, "learning_rate": 4.6170437326317494e-05, "loss": 2.2276, "step": 6998 }, { "epoch": 0.93, "grad_norm": 0.306640625, "learning_rate": 4.616931144766022e-05, "loss": 2.2712, "step": 6999 }, { "epoch": 0.93, "grad_norm": 0.29296875, "learning_rate": 4.6168185417256624e-05, "loss": 2.2523, "step": 7000 }, { "epoch": 0.93, "eval_loss": 2.257249593734741, "eval_runtime": 615.5852, "eval_samples_per_second": 62.982, "eval_steps_per_second": 7.874, "step": 7000 }, { "epoch": 0.93, "grad_norm": 0.27734375, "learning_rate": 4.616705923511477e-05, "loss": 2.2529, "step": 7001 }, { "epoch": 0.93, "grad_norm": 0.2890625, "learning_rate": 4.616593290124272e-05, "loss": 2.27, "step": 7002 }, { "epoch": 0.93, "grad_norm": 0.283203125, "learning_rate": 4.616480641564857e-05, "loss": 2.2668, "step": 7003 }, { "epoch": 0.93, "grad_norm": 0.279296875, "learning_rate": 4.616367977834039e-05, "loss": 2.2853, "step": 7004 }, { "epoch": 0.93, "grad_norm": 0.298828125, "learning_rate": 4.6162552989326235e-05, "loss": 2.2529, "step": 7005 }, { "epoch": 0.93, "grad_norm": 0.30078125, "learning_rate": 4.6161426048614195e-05, "loss": 2.2689, "step": 7006 }, { "epoch": 0.93, "grad_norm": 0.298828125, "learning_rate": 4.616029895621236e-05, "loss": 2.2719, "step": 7007 }, { "epoch": 0.93, "grad_norm": 0.291015625, "learning_rate": 4.6159171712128785e-05, "loss": 2.2634, "step": 7008 }, { "epoch": 0.93, "grad_norm": 0.291015625, "learning_rate": 4.615804431637157e-05, "loss": 2.2778, "step": 7009 }, { "epoch": 0.94, "grad_norm": 0.32421875, "learning_rate": 4.615691676894879e-05, "loss": 2.2299, "step": 7010 }, { "epoch": 0.94, "grad_norm": 0.28515625, "learning_rate": 4.615578906986853e-05, "loss": 2.227, "step": 7011 }, { "epoch": 0.94, "grad_norm": 0.2890625, "learning_rate": 4.615466121913887e-05, "loss": 2.2463, "step": 7012 }, { "epoch": 0.94, "grad_norm": 0.33203125, "learning_rate": 4.615353321676789e-05, "loss": 2.2675, "step": 7013 }, { "epoch": 0.94, "grad_norm": 0.298828125, "learning_rate": 4.615240506276369e-05, "loss": 2.2959, "step": 7014 }, { "epoch": 0.94, "grad_norm": 0.29296875, "learning_rate": 4.6151276757134346e-05, "loss": 2.2601, "step": 7015 }, { "epoch": 0.94, "grad_norm": 0.27734375, "learning_rate": 4.615014829988794e-05, "loss": 2.2558, "step": 7016 }, { "epoch": 0.94, "grad_norm": 0.291015625, "learning_rate": 4.614901969103258e-05, "loss": 2.2697, "step": 7017 }, { "epoch": 0.94, "grad_norm": 0.291015625, "learning_rate": 4.614789093057634e-05, "loss": 2.2378, "step": 7018 }, { "epoch": 0.94, "grad_norm": 0.296875, "learning_rate": 4.614676201852732e-05, "loss": 2.2694, "step": 7019 }, { "epoch": 0.94, "grad_norm": 0.2890625, "learning_rate": 4.6145632954893616e-05, "loss": 2.2554, "step": 7020 }, { "epoch": 0.94, "grad_norm": 0.28515625, "learning_rate": 4.61445037396833e-05, "loss": 2.2313, "step": 7021 }, { "epoch": 0.94, "grad_norm": 0.2890625, "learning_rate": 4.614337437290449e-05, "loss": 2.2805, "step": 7022 }, { "epoch": 0.94, "grad_norm": 0.2890625, "learning_rate": 4.6142244854565266e-05, "loss": 2.2487, "step": 7023 }, { "epoch": 0.94, "grad_norm": 0.298828125, "learning_rate": 4.6141115184673734e-05, "loss": 2.2605, "step": 7024 }, { "epoch": 0.94, "grad_norm": 0.298828125, "learning_rate": 4.613998536323799e-05, "loss": 2.2752, "step": 7025 }, { "epoch": 0.94, "grad_norm": 0.28125, "learning_rate": 4.613885539026613e-05, "loss": 2.2631, "step": 7026 }, { "epoch": 0.94, "grad_norm": 0.291015625, "learning_rate": 4.613772526576626e-05, "loss": 2.2587, "step": 7027 }, { "epoch": 0.94, "grad_norm": 0.296875, "learning_rate": 4.613659498974647e-05, "loss": 2.2444, "step": 7028 }, { "epoch": 0.94, "grad_norm": 0.294921875, "learning_rate": 4.613546456221488e-05, "loss": 2.259, "step": 7029 }, { "epoch": 0.94, "grad_norm": 0.30078125, "learning_rate": 4.6134333983179575e-05, "loss": 2.2784, "step": 7030 }, { "epoch": 0.94, "grad_norm": 0.291015625, "learning_rate": 4.613320325264866e-05, "loss": 2.2541, "step": 7031 }, { "epoch": 0.94, "grad_norm": 0.2890625, "learning_rate": 4.613207237063025e-05, "loss": 2.3053, "step": 7032 }, { "epoch": 0.94, "grad_norm": 0.296875, "learning_rate": 4.613094133713245e-05, "loss": 2.3008, "step": 7033 }, { "epoch": 0.94, "grad_norm": 0.291015625, "learning_rate": 4.6129810152163364e-05, "loss": 2.2883, "step": 7034 }, { "epoch": 0.94, "grad_norm": 0.298828125, "learning_rate": 4.6128678815731106e-05, "loss": 2.2727, "step": 7035 }, { "epoch": 0.94, "grad_norm": 0.291015625, "learning_rate": 4.612754732784378e-05, "loss": 2.2666, "step": 7036 }, { "epoch": 0.94, "grad_norm": 0.294921875, "learning_rate": 4.612641568850949e-05, "loss": 2.279, "step": 7037 }, { "epoch": 0.94, "grad_norm": 0.275390625, "learning_rate": 4.612528389773637e-05, "loss": 2.2782, "step": 7038 }, { "epoch": 0.94, "grad_norm": 0.291015625, "learning_rate": 4.612415195553251e-05, "loss": 2.2431, "step": 7039 }, { "epoch": 0.94, "grad_norm": 0.294921875, "learning_rate": 4.612301986190604e-05, "loss": 2.2346, "step": 7040 }, { "epoch": 0.94, "grad_norm": 0.3046875, "learning_rate": 4.612188761686505e-05, "loss": 2.2511, "step": 7041 }, { "epoch": 0.94, "grad_norm": 0.291015625, "learning_rate": 4.612075522041769e-05, "loss": 2.2541, "step": 7042 }, { "epoch": 0.94, "grad_norm": 0.298828125, "learning_rate": 4.611962267257206e-05, "loss": 2.2563, "step": 7043 }, { "epoch": 0.94, "grad_norm": 0.287109375, "learning_rate": 4.611848997333628e-05, "loss": 2.2522, "step": 7044 }, { "epoch": 0.94, "grad_norm": 0.287109375, "learning_rate": 4.611735712271847e-05, "loss": 2.2427, "step": 7045 }, { "epoch": 0.94, "grad_norm": 0.294921875, "learning_rate": 4.611622412072675e-05, "loss": 2.2517, "step": 7046 }, { "epoch": 0.94, "grad_norm": 0.32421875, "learning_rate": 4.611509096736923e-05, "loss": 2.2689, "step": 7047 }, { "epoch": 0.94, "grad_norm": 0.28125, "learning_rate": 4.611395766265406e-05, "loss": 2.242, "step": 7048 }, { "epoch": 0.94, "grad_norm": 0.294921875, "learning_rate": 4.6112824206589346e-05, "loss": 2.2445, "step": 7049 }, { "epoch": 0.94, "grad_norm": 0.283203125, "learning_rate": 4.6111690599183206e-05, "loss": 2.2584, "step": 7050 }, { "epoch": 0.94, "grad_norm": 0.28125, "learning_rate": 4.611055684044378e-05, "loss": 2.2826, "step": 7051 }, { "epoch": 0.94, "grad_norm": 0.302734375, "learning_rate": 4.610942293037919e-05, "loss": 2.284, "step": 7052 }, { "epoch": 0.94, "grad_norm": 0.3046875, "learning_rate": 4.610828886899756e-05, "loss": 2.2627, "step": 7053 }, { "epoch": 0.94, "grad_norm": 0.2734375, "learning_rate": 4.6107154656307025e-05, "loss": 2.2638, "step": 7054 }, { "epoch": 0.94, "grad_norm": 0.28515625, "learning_rate": 4.610602029231572e-05, "loss": 2.2811, "step": 7055 }, { "epoch": 0.94, "grad_norm": 0.298828125, "learning_rate": 4.610488577703176e-05, "loss": 2.2501, "step": 7056 }, { "epoch": 0.94, "grad_norm": 0.28515625, "learning_rate": 4.61037511104633e-05, "loss": 2.2737, "step": 7057 }, { "epoch": 0.94, "grad_norm": 0.28515625, "learning_rate": 4.6102616292618453e-05, "loss": 2.2568, "step": 7058 }, { "epoch": 0.94, "grad_norm": 0.30078125, "learning_rate": 4.6101481323505356e-05, "loss": 2.2933, "step": 7059 }, { "epoch": 0.94, "grad_norm": 0.30078125, "learning_rate": 4.610034620313216e-05, "loss": 2.2264, "step": 7060 }, { "epoch": 0.94, "grad_norm": 0.28125, "learning_rate": 4.609921093150699e-05, "loss": 2.2711, "step": 7061 }, { "epoch": 0.94, "grad_norm": 0.28125, "learning_rate": 4.6098075508637976e-05, "loss": 2.2731, "step": 7062 }, { "epoch": 0.94, "grad_norm": 0.2890625, "learning_rate": 4.6096939934533276e-05, "loss": 2.243, "step": 7063 }, { "epoch": 0.94, "grad_norm": 0.283203125, "learning_rate": 4.609580420920102e-05, "loss": 2.2753, "step": 7064 }, { "epoch": 0.94, "grad_norm": 0.3046875, "learning_rate": 4.609466833264935e-05, "loss": 2.2809, "step": 7065 }, { "epoch": 0.94, "grad_norm": 0.29296875, "learning_rate": 4.609353230488641e-05, "loss": 2.268, "step": 7066 }, { "epoch": 0.94, "grad_norm": 0.306640625, "learning_rate": 4.609239612592033e-05, "loss": 2.2907, "step": 7067 }, { "epoch": 0.94, "grad_norm": 0.287109375, "learning_rate": 4.6091259795759276e-05, "loss": 2.2499, "step": 7068 }, { "epoch": 0.94, "grad_norm": 0.28515625, "learning_rate": 4.6090123314411383e-05, "loss": 2.2622, "step": 7069 }, { "epoch": 0.94, "grad_norm": 0.291015625, "learning_rate": 4.60889866818848e-05, "loss": 2.2868, "step": 7070 }, { "epoch": 0.94, "grad_norm": 0.291015625, "learning_rate": 4.608784989818767e-05, "loss": 2.2583, "step": 7071 }, { "epoch": 0.94, "grad_norm": 0.296875, "learning_rate": 4.6086712963328145e-05, "loss": 2.2829, "step": 7072 }, { "epoch": 0.94, "grad_norm": 0.30078125, "learning_rate": 4.608557587731437e-05, "loss": 2.2676, "step": 7073 }, { "epoch": 0.94, "grad_norm": 0.296875, "learning_rate": 4.60844386401545e-05, "loss": 2.2395, "step": 7074 }, { "epoch": 0.94, "grad_norm": 0.3125, "learning_rate": 4.608330125185669e-05, "loss": 2.2702, "step": 7075 }, { "epoch": 0.94, "grad_norm": 0.283203125, "learning_rate": 4.6082163712429095e-05, "loss": 2.2904, "step": 7076 }, { "epoch": 0.94, "grad_norm": 0.2890625, "learning_rate": 4.6081026021879856e-05, "loss": 2.2637, "step": 7077 }, { "epoch": 0.94, "grad_norm": 0.298828125, "learning_rate": 4.607988818021715e-05, "loss": 2.2393, "step": 7078 }, { "epoch": 0.94, "grad_norm": 0.298828125, "learning_rate": 4.60787501874491e-05, "loss": 2.2757, "step": 7079 }, { "epoch": 0.94, "grad_norm": 0.27734375, "learning_rate": 4.60776120435839e-05, "loss": 2.2297, "step": 7080 }, { "epoch": 0.94, "grad_norm": 0.287109375, "learning_rate": 4.607647374862968e-05, "loss": 2.2746, "step": 7081 }, { "epoch": 0.94, "grad_norm": 0.30078125, "learning_rate": 4.607533530259461e-05, "loss": 2.2618, "step": 7082 }, { "epoch": 0.94, "grad_norm": 0.296875, "learning_rate": 4.607419670548686e-05, "loss": 2.2733, "step": 7083 }, { "epoch": 0.94, "grad_norm": 0.2890625, "learning_rate": 4.607305795731458e-05, "loss": 2.2301, "step": 7084 }, { "epoch": 0.95, "grad_norm": 0.318359375, "learning_rate": 4.6071919058085935e-05, "loss": 2.254, "step": 7085 }, { "epoch": 0.95, "grad_norm": 0.29296875, "learning_rate": 4.6070780007809086e-05, "loss": 2.2774, "step": 7086 }, { "epoch": 0.95, "grad_norm": 0.28515625, "learning_rate": 4.606964080649221e-05, "loss": 2.2604, "step": 7087 }, { "epoch": 0.95, "grad_norm": 0.3046875, "learning_rate": 4.606850145414347e-05, "loss": 2.2483, "step": 7088 }, { "epoch": 0.95, "grad_norm": 0.314453125, "learning_rate": 4.606736195077101e-05, "loss": 2.2477, "step": 7089 }, { "epoch": 0.95, "grad_norm": 0.279296875, "learning_rate": 4.6066222296383025e-05, "loss": 2.3117, "step": 7090 }, { "epoch": 0.95, "grad_norm": 0.2890625, "learning_rate": 4.606508249098768e-05, "loss": 2.2704, "step": 7091 }, { "epoch": 0.95, "grad_norm": 0.294921875, "learning_rate": 4.606394253459314e-05, "loss": 2.2532, "step": 7092 }, { "epoch": 0.95, "grad_norm": 0.294921875, "learning_rate": 4.606280242720757e-05, "loss": 2.2753, "step": 7093 }, { "epoch": 0.95, "grad_norm": 0.298828125, "learning_rate": 4.606166216883916e-05, "loss": 2.2887, "step": 7094 }, { "epoch": 0.95, "grad_norm": 0.294921875, "learning_rate": 4.606052175949607e-05, "loss": 2.254, "step": 7095 }, { "epoch": 0.95, "grad_norm": 0.279296875, "learning_rate": 4.605938119918648e-05, "loss": 2.2497, "step": 7096 }, { "epoch": 0.95, "grad_norm": 0.310546875, "learning_rate": 4.6058240487918565e-05, "loss": 2.2653, "step": 7097 }, { "epoch": 0.95, "grad_norm": 0.30078125, "learning_rate": 4.6057099625700494e-05, "loss": 2.2857, "step": 7098 }, { "epoch": 0.95, "grad_norm": 0.28515625, "learning_rate": 4.605595861254046e-05, "loss": 2.2676, "step": 7099 }, { "epoch": 0.95, "grad_norm": 0.275390625, "learning_rate": 4.605481744844663e-05, "loss": 2.254, "step": 7100 }, { "epoch": 0.95, "grad_norm": 0.3046875, "learning_rate": 4.6053676133427195e-05, "loss": 2.2589, "step": 7101 }, { "epoch": 0.95, "grad_norm": 0.302734375, "learning_rate": 4.605253466749032e-05, "loss": 2.2449, "step": 7102 }, { "epoch": 0.95, "grad_norm": 0.296875, "learning_rate": 4.60513930506442e-05, "loss": 2.2532, "step": 7103 }, { "epoch": 0.95, "grad_norm": 0.283203125, "learning_rate": 4.605025128289702e-05, "loss": 2.267, "step": 7104 }, { "epoch": 0.95, "grad_norm": 0.283203125, "learning_rate": 4.604910936425695e-05, "loss": 2.2735, "step": 7105 }, { "epoch": 0.95, "grad_norm": 0.28125, "learning_rate": 4.604796729473219e-05, "loss": 2.2601, "step": 7106 }, { "epoch": 0.95, "grad_norm": 0.306640625, "learning_rate": 4.6046825074330925e-05, "loss": 2.2436, "step": 7107 }, { "epoch": 0.95, "grad_norm": 0.2734375, "learning_rate": 4.604568270306133e-05, "loss": 2.2628, "step": 7108 }, { "epoch": 0.95, "grad_norm": 0.291015625, "learning_rate": 4.604454018093161e-05, "loss": 2.2819, "step": 7109 }, { "epoch": 0.95, "grad_norm": 0.28125, "learning_rate": 4.604339750794995e-05, "loss": 2.2658, "step": 7110 }, { "epoch": 0.95, "grad_norm": 0.28125, "learning_rate": 4.604225468412453e-05, "loss": 2.2741, "step": 7111 }, { "epoch": 0.95, "grad_norm": 0.302734375, "learning_rate": 4.604111170946356e-05, "loss": 2.2237, "step": 7112 }, { "epoch": 0.95, "grad_norm": 0.29296875, "learning_rate": 4.603996858397522e-05, "loss": 2.3045, "step": 7113 }, { "epoch": 0.95, "grad_norm": 0.275390625, "learning_rate": 4.603882530766771e-05, "loss": 2.239, "step": 7114 }, { "epoch": 0.95, "grad_norm": 0.314453125, "learning_rate": 4.6037681880549224e-05, "loss": 2.2518, "step": 7115 }, { "epoch": 0.95, "grad_norm": 0.283203125, "learning_rate": 4.6036538302627954e-05, "loss": 2.3035, "step": 7116 }, { "epoch": 0.95, "grad_norm": 0.287109375, "learning_rate": 4.6035394573912107e-05, "loss": 2.2991, "step": 7117 }, { "epoch": 0.95, "grad_norm": 0.28125, "learning_rate": 4.6034250694409866e-05, "loss": 2.2641, "step": 7118 }, { "epoch": 0.95, "grad_norm": 0.30078125, "learning_rate": 4.6033106664129445e-05, "loss": 2.2859, "step": 7119 }, { "epoch": 0.95, "grad_norm": 0.298828125, "learning_rate": 4.603196248307904e-05, "loss": 2.2438, "step": 7120 }, { "epoch": 0.95, "grad_norm": 0.283203125, "learning_rate": 4.603081815126686e-05, "loss": 2.2627, "step": 7121 }, { "epoch": 0.95, "grad_norm": 0.28515625, "learning_rate": 4.602967366870109e-05, "loss": 2.2381, "step": 7122 }, { "epoch": 0.95, "grad_norm": 0.2890625, "learning_rate": 4.602852903538995e-05, "loss": 2.2674, "step": 7123 }, { "epoch": 0.95, "grad_norm": 0.30078125, "learning_rate": 4.602738425134164e-05, "loss": 2.2949, "step": 7124 }, { "epoch": 0.95, "grad_norm": 0.296875, "learning_rate": 4.6026239316564364e-05, "loss": 2.2778, "step": 7125 }, { "epoch": 0.95, "grad_norm": 0.28125, "learning_rate": 4.602509423106633e-05, "loss": 2.264, "step": 7126 }, { "epoch": 0.95, "grad_norm": 0.3125, "learning_rate": 4.602394899485575e-05, "loss": 2.2481, "step": 7127 }, { "epoch": 0.95, "grad_norm": 0.302734375, "learning_rate": 4.6022803607940826e-05, "loss": 2.2198, "step": 7128 }, { "epoch": 0.95, "grad_norm": 0.296875, "learning_rate": 4.602165807032977e-05, "loss": 2.2305, "step": 7129 }, { "epoch": 0.95, "grad_norm": 0.2890625, "learning_rate": 4.60205123820308e-05, "loss": 2.2774, "step": 7130 }, { "epoch": 0.95, "grad_norm": 0.30078125, "learning_rate": 4.6019366543052124e-05, "loss": 2.2585, "step": 7131 }, { "epoch": 0.95, "grad_norm": 0.310546875, "learning_rate": 4.6018220553401957e-05, "loss": 2.2634, "step": 7132 }, { "epoch": 0.95, "grad_norm": 0.298828125, "learning_rate": 4.6017074413088515e-05, "loss": 2.254, "step": 7133 }, { "epoch": 0.95, "grad_norm": 0.296875, "learning_rate": 4.601592812212001e-05, "loss": 2.2577, "step": 7134 }, { "epoch": 0.95, "grad_norm": 0.296875, "learning_rate": 4.601478168050466e-05, "loss": 2.2753, "step": 7135 }, { "epoch": 0.95, "grad_norm": 0.30078125, "learning_rate": 4.601363508825068e-05, "loss": 2.2472, "step": 7136 }, { "epoch": 0.95, "grad_norm": 0.330078125, "learning_rate": 4.60124883453663e-05, "loss": 2.2505, "step": 7137 }, { "epoch": 0.95, "grad_norm": 0.27734375, "learning_rate": 4.6011341451859724e-05, "loss": 2.2912, "step": 7138 }, { "epoch": 0.95, "grad_norm": 0.296875, "learning_rate": 4.601019440773919e-05, "loss": 2.2655, "step": 7139 }, { "epoch": 0.95, "grad_norm": 0.30078125, "learning_rate": 4.6009047213012904e-05, "loss": 2.2828, "step": 7140 }, { "epoch": 0.95, "grad_norm": 0.287109375, "learning_rate": 4.600789986768911e-05, "loss": 2.2941, "step": 7141 }, { "epoch": 0.95, "grad_norm": 0.287109375, "learning_rate": 4.6006752371776005e-05, "loss": 2.2666, "step": 7142 }, { "epoch": 0.95, "grad_norm": 0.283203125, "learning_rate": 4.600560472528184e-05, "loss": 2.2346, "step": 7143 }, { "epoch": 0.95, "grad_norm": 0.28125, "learning_rate": 4.6004456928214824e-05, "loss": 2.2666, "step": 7144 }, { "epoch": 0.95, "grad_norm": 0.30078125, "learning_rate": 4.600330898058319e-05, "loss": 2.2722, "step": 7145 }, { "epoch": 0.95, "grad_norm": 0.314453125, "learning_rate": 4.600216088239517e-05, "loss": 2.2926, "step": 7146 }, { "epoch": 0.95, "grad_norm": 0.298828125, "learning_rate": 4.6001012633659e-05, "loss": 2.2619, "step": 7147 }, { "epoch": 0.95, "grad_norm": 0.283203125, "learning_rate": 4.59998642343829e-05, "loss": 2.2726, "step": 7148 }, { "epoch": 0.95, "grad_norm": 0.291015625, "learning_rate": 4.59987156845751e-05, "loss": 2.2575, "step": 7149 }, { "epoch": 0.95, "grad_norm": 0.30859375, "learning_rate": 4.599756698424384e-05, "loss": 2.2305, "step": 7150 }, { "epoch": 0.95, "grad_norm": 0.298828125, "learning_rate": 4.599641813339736e-05, "loss": 2.2856, "step": 7151 }, { "epoch": 0.95, "grad_norm": 0.28125, "learning_rate": 4.599526913204388e-05, "loss": 2.2492, "step": 7152 }, { "epoch": 0.95, "grad_norm": 0.302734375, "learning_rate": 4.599411998019165e-05, "loss": 2.2701, "step": 7153 }, { "epoch": 0.95, "grad_norm": 0.2890625, "learning_rate": 4.599297067784889e-05, "loss": 2.2832, "step": 7154 }, { "epoch": 0.95, "grad_norm": 0.275390625, "learning_rate": 4.599182122502386e-05, "loss": 2.2811, "step": 7155 }, { "epoch": 0.95, "grad_norm": 0.28125, "learning_rate": 4.599067162172478e-05, "loss": 2.2574, "step": 7156 }, { "epoch": 0.95, "grad_norm": 0.3046875, "learning_rate": 4.598952186795991e-05, "loss": 2.2644, "step": 7157 }, { "epoch": 0.95, "grad_norm": 0.291015625, "learning_rate": 4.5988371963737474e-05, "loss": 2.2498, "step": 7158 }, { "epoch": 0.95, "grad_norm": 0.283203125, "learning_rate": 4.598722190906573e-05, "loss": 2.2667, "step": 7159 }, { "epoch": 0.96, "grad_norm": 0.298828125, "learning_rate": 4.5986071703952905e-05, "loss": 2.2424, "step": 7160 }, { "epoch": 0.96, "grad_norm": 0.296875, "learning_rate": 4.598492134840725e-05, "loss": 2.2834, "step": 7161 }, { "epoch": 0.96, "grad_norm": 0.291015625, "learning_rate": 4.5983770842437024e-05, "loss": 2.2749, "step": 7162 }, { "epoch": 0.96, "grad_norm": 0.296875, "learning_rate": 4.5982620186050466e-05, "loss": 2.2121, "step": 7163 }, { "epoch": 0.96, "grad_norm": 0.28515625, "learning_rate": 4.598146937925581e-05, "loss": 2.2689, "step": 7164 }, { "epoch": 0.96, "grad_norm": 0.306640625, "learning_rate": 4.5980318422061325e-05, "loss": 2.2668, "step": 7165 }, { "epoch": 0.96, "grad_norm": 0.298828125, "learning_rate": 4.597916731447525e-05, "loss": 2.2578, "step": 7166 }, { "epoch": 0.96, "grad_norm": 0.294921875, "learning_rate": 4.597801605650585e-05, "loss": 2.2636, "step": 7167 }, { "epoch": 0.96, "grad_norm": 0.27734375, "learning_rate": 4.597686464816135e-05, "loss": 2.2907, "step": 7168 }, { "epoch": 0.96, "grad_norm": 0.2890625, "learning_rate": 4.597571308945004e-05, "loss": 2.2402, "step": 7169 }, { "epoch": 0.96, "grad_norm": 0.306640625, "learning_rate": 4.597456138038014e-05, "loss": 2.2644, "step": 7170 }, { "epoch": 0.96, "grad_norm": 0.294921875, "learning_rate": 4.597340952095994e-05, "loss": 2.224, "step": 7171 }, { "epoch": 0.96, "grad_norm": 0.306640625, "learning_rate": 4.597225751119766e-05, "loss": 2.2321, "step": 7172 }, { "epoch": 0.96, "grad_norm": 0.296875, "learning_rate": 4.5971105351101585e-05, "loss": 2.241, "step": 7173 }, { "epoch": 0.96, "grad_norm": 0.294921875, "learning_rate": 4.596995304067997e-05, "loss": 2.2706, "step": 7174 }, { "epoch": 0.96, "grad_norm": 0.27734375, "learning_rate": 4.596880057994106e-05, "loss": 2.2749, "step": 7175 }, { "epoch": 0.96, "grad_norm": 0.283203125, "learning_rate": 4.5967647968893137e-05, "loss": 2.2679, "step": 7176 }, { "epoch": 0.96, "grad_norm": 0.294921875, "learning_rate": 4.596649520754444e-05, "loss": 2.2707, "step": 7177 }, { "epoch": 0.96, "grad_norm": 0.291015625, "learning_rate": 4.596534229590325e-05, "loss": 2.2452, "step": 7178 }, { "epoch": 0.96, "grad_norm": 0.29296875, "learning_rate": 4.596418923397783e-05, "loss": 2.2198, "step": 7179 }, { "epoch": 0.96, "grad_norm": 0.30078125, "learning_rate": 4.596303602177644e-05, "loss": 2.2608, "step": 7180 }, { "epoch": 0.96, "grad_norm": 0.294921875, "learning_rate": 4.596188265930734e-05, "loss": 2.2421, "step": 7181 }, { "epoch": 0.96, "grad_norm": 0.27734375, "learning_rate": 4.5960729146578816e-05, "loss": 2.2717, "step": 7182 }, { "epoch": 0.96, "grad_norm": 0.302734375, "learning_rate": 4.595957548359912e-05, "loss": 2.2662, "step": 7183 }, { "epoch": 0.96, "grad_norm": 0.294921875, "learning_rate": 4.595842167037653e-05, "loss": 2.2251, "step": 7184 }, { "epoch": 0.96, "grad_norm": 0.28125, "learning_rate": 4.5957267706919316e-05, "loss": 2.2782, "step": 7185 }, { "epoch": 0.96, "grad_norm": 0.310546875, "learning_rate": 4.595611359323574e-05, "loss": 2.2221, "step": 7186 }, { "epoch": 0.96, "grad_norm": 0.29296875, "learning_rate": 4.595495932933409e-05, "loss": 2.2451, "step": 7187 }, { "epoch": 0.96, "grad_norm": 0.306640625, "learning_rate": 4.5953804915222635e-05, "loss": 2.2727, "step": 7188 }, { "epoch": 0.96, "grad_norm": 0.30078125, "learning_rate": 4.595265035090965e-05, "loss": 2.2714, "step": 7189 }, { "epoch": 0.96, "grad_norm": 0.287109375, "learning_rate": 4.595149563640341e-05, "loss": 2.2227, "step": 7190 }, { "epoch": 0.96, "grad_norm": 0.291015625, "learning_rate": 4.5950340771712185e-05, "loss": 2.2527, "step": 7191 }, { "epoch": 0.96, "grad_norm": 0.294921875, "learning_rate": 4.5949185756844256e-05, "loss": 2.3332, "step": 7192 }, { "epoch": 0.96, "grad_norm": 0.28515625, "learning_rate": 4.5948030591807924e-05, "loss": 2.2609, "step": 7193 }, { "epoch": 0.96, "grad_norm": 0.29296875, "learning_rate": 4.594687527661144e-05, "loss": 2.2581, "step": 7194 }, { "epoch": 0.96, "grad_norm": 0.296875, "learning_rate": 4.59457198112631e-05, "loss": 2.2598, "step": 7195 }, { "epoch": 0.96, "grad_norm": 0.291015625, "learning_rate": 4.594456419577118e-05, "loss": 2.23, "step": 7196 }, { "epoch": 0.96, "grad_norm": 0.310546875, "learning_rate": 4.594340843014398e-05, "loss": 2.2729, "step": 7197 }, { "epoch": 0.96, "grad_norm": 0.287109375, "learning_rate": 4.5942252514389764e-05, "loss": 2.2497, "step": 7198 }, { "epoch": 0.96, "grad_norm": 0.279296875, "learning_rate": 4.594109644851682e-05, "loss": 2.2314, "step": 7199 }, { "epoch": 0.96, "grad_norm": 0.2890625, "learning_rate": 4.5939940232533454e-05, "loss": 2.2722, "step": 7200 }, { "epoch": 0.96, "grad_norm": 0.291015625, "learning_rate": 4.593878386644794e-05, "loss": 2.2412, "step": 7201 }, { "epoch": 0.96, "grad_norm": 0.28515625, "learning_rate": 4.593762735026857e-05, "loss": 2.2395, "step": 7202 }, { "epoch": 0.96, "grad_norm": 0.302734375, "learning_rate": 4.5936470684003616e-05, "loss": 2.2409, "step": 7203 }, { "epoch": 0.96, "grad_norm": 0.3046875, "learning_rate": 4.593531386766141e-05, "loss": 2.275, "step": 7204 }, { "epoch": 0.96, "grad_norm": 0.30078125, "learning_rate": 4.59341569012502e-05, "loss": 2.2614, "step": 7205 }, { "epoch": 0.96, "grad_norm": 0.30078125, "learning_rate": 4.5932999784778315e-05, "loss": 2.2698, "step": 7206 }, { "epoch": 0.96, "grad_norm": 0.287109375, "learning_rate": 4.593184251825403e-05, "loss": 2.3014, "step": 7207 }, { "epoch": 0.96, "grad_norm": 0.298828125, "learning_rate": 4.5930685101685644e-05, "loss": 2.279, "step": 7208 }, { "epoch": 0.96, "grad_norm": 0.302734375, "learning_rate": 4.592952753508145e-05, "loss": 2.2662, "step": 7209 }, { "epoch": 0.96, "grad_norm": 0.28515625, "learning_rate": 4.5928369818449753e-05, "loss": 2.3025, "step": 7210 }, { "epoch": 0.96, "grad_norm": 0.29296875, "learning_rate": 4.5927211951798854e-05, "loss": 2.2715, "step": 7211 }, { "epoch": 0.96, "grad_norm": 0.29296875, "learning_rate": 4.5926053935137044e-05, "loss": 2.2461, "step": 7212 }, { "epoch": 0.96, "grad_norm": 0.287109375, "learning_rate": 4.5924895768472624e-05, "loss": 2.2763, "step": 7213 }, { "epoch": 0.96, "grad_norm": 0.3046875, "learning_rate": 4.592373745181391e-05, "loss": 2.2692, "step": 7214 }, { "epoch": 0.96, "grad_norm": 0.287109375, "learning_rate": 4.592257898516919e-05, "loss": 2.2854, "step": 7215 }, { "epoch": 0.96, "grad_norm": 0.31640625, "learning_rate": 4.5921420368546765e-05, "loss": 2.269, "step": 7216 }, { "epoch": 0.96, "grad_norm": 0.2890625, "learning_rate": 4.592026160195495e-05, "loss": 2.2604, "step": 7217 }, { "epoch": 0.96, "grad_norm": 0.28515625, "learning_rate": 4.591910268540206e-05, "loss": 2.2578, "step": 7218 }, { "epoch": 0.96, "grad_norm": 0.294921875, "learning_rate": 4.591794361889638e-05, "loss": 2.2561, "step": 7219 }, { "epoch": 0.96, "grad_norm": 0.279296875, "learning_rate": 4.5916784402446234e-05, "loss": 2.2161, "step": 7220 }, { "epoch": 0.96, "grad_norm": 0.283203125, "learning_rate": 4.591562503605993e-05, "loss": 2.2886, "step": 7221 }, { "epoch": 0.96, "grad_norm": 0.306640625, "learning_rate": 4.591446551974577e-05, "loss": 2.2816, "step": 7222 }, { "epoch": 0.96, "grad_norm": 0.291015625, "learning_rate": 4.591330585351209e-05, "loss": 2.2669, "step": 7223 }, { "epoch": 0.96, "grad_norm": 0.279296875, "learning_rate": 4.5912146037367166e-05, "loss": 2.2788, "step": 7224 }, { "epoch": 0.96, "grad_norm": 0.3046875, "learning_rate": 4.591098607131934e-05, "loss": 2.2614, "step": 7225 }, { "epoch": 0.96, "grad_norm": 0.294921875, "learning_rate": 4.590982595537692e-05, "loss": 2.2864, "step": 7226 }, { "epoch": 0.96, "grad_norm": 0.298828125, "learning_rate": 4.5908665689548205e-05, "loss": 2.2487, "step": 7227 }, { "epoch": 0.96, "grad_norm": 0.28515625, "learning_rate": 4.590750527384154e-05, "loss": 2.2522, "step": 7228 }, { "epoch": 0.96, "grad_norm": 0.302734375, "learning_rate": 4.5906344708265225e-05, "loss": 2.3119, "step": 7229 }, { "epoch": 0.96, "grad_norm": 0.294921875, "learning_rate": 4.590518399282758e-05, "loss": 2.2498, "step": 7230 }, { "epoch": 0.96, "grad_norm": 0.3203125, "learning_rate": 4.5904023127536934e-05, "loss": 2.2692, "step": 7231 }, { "epoch": 0.96, "grad_norm": 0.294921875, "learning_rate": 4.5902862112401604e-05, "loss": 2.2608, "step": 7232 }, { "epoch": 0.96, "grad_norm": 0.30859375, "learning_rate": 4.590170094742991e-05, "loss": 2.2708, "step": 7233 }, { "epoch": 0.96, "grad_norm": 0.296875, "learning_rate": 4.590053963263018e-05, "loss": 2.2553, "step": 7234 }, { "epoch": 0.97, "grad_norm": 0.298828125, "learning_rate": 4.589937816801073e-05, "loss": 2.2881, "step": 7235 }, { "epoch": 0.97, "grad_norm": 0.283203125, "learning_rate": 4.58982165535799e-05, "loss": 2.258, "step": 7236 }, { "epoch": 0.97, "grad_norm": 0.287109375, "learning_rate": 4.5897054789346e-05, "loss": 2.2728, "step": 7237 }, { "epoch": 0.97, "grad_norm": 0.291015625, "learning_rate": 4.589589287531737e-05, "loss": 2.2476, "step": 7238 }, { "epoch": 0.97, "grad_norm": 0.31640625, "learning_rate": 4.5894730811502336e-05, "loss": 2.2613, "step": 7239 }, { "epoch": 0.97, "grad_norm": 0.310546875, "learning_rate": 4.589356859790922e-05, "loss": 2.2665, "step": 7240 }, { "epoch": 0.97, "grad_norm": 0.3203125, "learning_rate": 4.589240623454637e-05, "loss": 2.264, "step": 7241 }, { "epoch": 0.97, "grad_norm": 0.302734375, "learning_rate": 4.58912437214221e-05, "loss": 2.2632, "step": 7242 }, { "epoch": 0.97, "grad_norm": 0.296875, "learning_rate": 4.589008105854475e-05, "loss": 2.2658, "step": 7243 }, { "epoch": 0.97, "grad_norm": 0.30078125, "learning_rate": 4.588891824592266e-05, "loss": 2.2586, "step": 7244 }, { "epoch": 0.97, "grad_norm": 0.31640625, "learning_rate": 4.588775528356416e-05, "loss": 2.2304, "step": 7245 }, { "epoch": 0.97, "grad_norm": 0.298828125, "learning_rate": 4.588659217147759e-05, "loss": 2.2908, "step": 7246 }, { "epoch": 0.97, "grad_norm": 0.3046875, "learning_rate": 4.588542890967128e-05, "loss": 2.2552, "step": 7247 }, { "epoch": 0.97, "grad_norm": 0.306640625, "learning_rate": 4.5884265498153576e-05, "loss": 2.2572, "step": 7248 }, { "epoch": 0.97, "grad_norm": 0.283203125, "learning_rate": 4.5883101936932813e-05, "loss": 2.2699, "step": 7249 }, { "epoch": 0.97, "grad_norm": 0.287109375, "learning_rate": 4.588193822601733e-05, "loss": 2.2308, "step": 7250 }, { "epoch": 0.97, "grad_norm": 0.27734375, "learning_rate": 4.588077436541548e-05, "loss": 2.2529, "step": 7251 }, { "epoch": 0.97, "grad_norm": 0.29296875, "learning_rate": 4.5879610355135586e-05, "loss": 2.2806, "step": 7252 }, { "epoch": 0.97, "grad_norm": 0.279296875, "learning_rate": 4.5878446195186014e-05, "loss": 2.27, "step": 7253 }, { "epoch": 0.97, "grad_norm": 0.2890625, "learning_rate": 4.58772818855751e-05, "loss": 2.2584, "step": 7254 }, { "epoch": 0.97, "grad_norm": 0.30859375, "learning_rate": 4.587611742631118e-05, "loss": 2.3097, "step": 7255 }, { "epoch": 0.97, "grad_norm": 0.283203125, "learning_rate": 4.587495281740261e-05, "loss": 2.2572, "step": 7256 }, { "epoch": 0.97, "grad_norm": 0.30859375, "learning_rate": 4.587378805885774e-05, "loss": 2.2616, "step": 7257 }, { "epoch": 0.97, "grad_norm": 0.283203125, "learning_rate": 4.587262315068492e-05, "loss": 2.2688, "step": 7258 }, { "epoch": 0.97, "grad_norm": 0.29296875, "learning_rate": 4.587145809289249e-05, "loss": 2.2356, "step": 7259 }, { "epoch": 0.97, "grad_norm": 0.29296875, "learning_rate": 4.587029288548882e-05, "loss": 2.2765, "step": 7260 }, { "epoch": 0.97, "grad_norm": 0.275390625, "learning_rate": 4.5869127528482245e-05, "loss": 2.2682, "step": 7261 }, { "epoch": 0.97, "grad_norm": 0.279296875, "learning_rate": 4.586796202188112e-05, "loss": 2.2533, "step": 7262 }, { "epoch": 0.97, "grad_norm": 0.296875, "learning_rate": 4.586679636569381e-05, "loss": 2.3078, "step": 7263 }, { "epoch": 0.97, "grad_norm": 0.298828125, "learning_rate": 4.586563055992867e-05, "loss": 2.3024, "step": 7264 }, { "epoch": 0.97, "grad_norm": 0.302734375, "learning_rate": 4.586446460459404e-05, "loss": 2.2206, "step": 7265 }, { "epoch": 0.97, "grad_norm": 0.2890625, "learning_rate": 4.5863298499698305e-05, "loss": 2.2364, "step": 7266 }, { "epoch": 0.97, "grad_norm": 0.306640625, "learning_rate": 4.5862132245249794e-05, "loss": 2.287, "step": 7267 }, { "epoch": 0.97, "grad_norm": 0.294921875, "learning_rate": 4.5860965841256885e-05, "loss": 2.2647, "step": 7268 }, { "epoch": 0.97, "grad_norm": 0.287109375, "learning_rate": 4.5859799287727946e-05, "loss": 2.2771, "step": 7269 }, { "epoch": 0.97, "grad_norm": 0.283203125, "learning_rate": 4.585863258467132e-05, "loss": 2.258, "step": 7270 }, { "epoch": 0.97, "grad_norm": 0.296875, "learning_rate": 4.585746573209538e-05, "loss": 2.2723, "step": 7271 }, { "epoch": 0.97, "grad_norm": 0.287109375, "learning_rate": 4.5856298730008486e-05, "loss": 2.277, "step": 7272 }, { "epoch": 0.97, "grad_norm": 0.2734375, "learning_rate": 4.5855131578419005e-05, "loss": 2.2881, "step": 7273 }, { "epoch": 0.97, "grad_norm": 0.287109375, "learning_rate": 4.585396427733532e-05, "loss": 2.2336, "step": 7274 }, { "epoch": 0.97, "grad_norm": 0.298828125, "learning_rate": 4.5852796826765764e-05, "loss": 2.2621, "step": 7275 }, { "epoch": 0.97, "grad_norm": 0.298828125, "learning_rate": 4.585162922671874e-05, "loss": 2.2325, "step": 7276 }, { "epoch": 0.97, "grad_norm": 0.302734375, "learning_rate": 4.585046147720259e-05, "loss": 2.2541, "step": 7277 }, { "epoch": 0.97, "grad_norm": 0.287109375, "learning_rate": 4.58492935782257e-05, "loss": 2.2719, "step": 7278 }, { "epoch": 0.97, "grad_norm": 0.3046875, "learning_rate": 4.584812552979645e-05, "loss": 2.2833, "step": 7279 }, { "epoch": 0.97, "grad_norm": 0.291015625, "learning_rate": 4.5846957331923195e-05, "loss": 2.2962, "step": 7280 }, { "epoch": 0.97, "grad_norm": 0.2890625, "learning_rate": 4.5845788984614315e-05, "loss": 2.2636, "step": 7281 }, { "epoch": 0.97, "grad_norm": 0.287109375, "learning_rate": 4.584462048787819e-05, "loss": 2.2358, "step": 7282 }, { "epoch": 0.97, "grad_norm": 0.291015625, "learning_rate": 4.5843451841723184e-05, "loss": 2.2978, "step": 7283 }, { "epoch": 0.97, "grad_norm": 0.302734375, "learning_rate": 4.5842283046157685e-05, "loss": 2.2562, "step": 7284 }, { "epoch": 0.97, "grad_norm": 0.283203125, "learning_rate": 4.584111410119007e-05, "loss": 2.2436, "step": 7285 }, { "epoch": 0.97, "grad_norm": 0.275390625, "learning_rate": 4.583994500682871e-05, "loss": 2.2358, "step": 7286 }, { "epoch": 0.97, "grad_norm": 0.28515625, "learning_rate": 4.5838775763082e-05, "loss": 2.2806, "step": 7287 }, { "epoch": 0.97, "grad_norm": 0.291015625, "learning_rate": 4.583760636995831e-05, "loss": 2.2391, "step": 7288 }, { "epoch": 0.97, "grad_norm": 0.28125, "learning_rate": 4.5836436827466024e-05, "loss": 2.2525, "step": 7289 }, { "epoch": 0.97, "grad_norm": 0.27734375, "learning_rate": 4.583526713561353e-05, "loss": 2.2802, "step": 7290 }, { "epoch": 0.97, "grad_norm": 0.28515625, "learning_rate": 4.5834097294409205e-05, "loss": 2.2494, "step": 7291 }, { "epoch": 0.97, "grad_norm": 0.28515625, "learning_rate": 4.583292730386144e-05, "loss": 2.2651, "step": 7292 }, { "epoch": 0.97, "grad_norm": 0.28515625, "learning_rate": 4.5831757163978626e-05, "loss": 2.2285, "step": 7293 }, { "epoch": 0.97, "grad_norm": 0.298828125, "learning_rate": 4.5830586874769146e-05, "loss": 2.2685, "step": 7294 }, { "epoch": 0.97, "grad_norm": 0.306640625, "learning_rate": 4.582941643624139e-05, "loss": 2.2472, "step": 7295 }, { "epoch": 0.97, "grad_norm": 0.27734375, "learning_rate": 4.582824584840374e-05, "loss": 2.2529, "step": 7296 }, { "epoch": 0.97, "grad_norm": 0.275390625, "learning_rate": 4.582707511126459e-05, "loss": 2.2622, "step": 7297 }, { "epoch": 0.97, "grad_norm": 0.2890625, "learning_rate": 4.582590422483235e-05, "loss": 2.2494, "step": 7298 }, { "epoch": 0.97, "grad_norm": 0.302734375, "learning_rate": 4.5824733189115385e-05, "loss": 2.2626, "step": 7299 }, { "epoch": 0.97, "grad_norm": 0.2890625, "learning_rate": 4.582356200412211e-05, "loss": 2.2723, "step": 7300 }, { "epoch": 0.97, "grad_norm": 0.306640625, "learning_rate": 4.582239066986091e-05, "loss": 2.2532, "step": 7301 }, { "epoch": 0.97, "grad_norm": 0.291015625, "learning_rate": 4.58212191863402e-05, "loss": 2.2638, "step": 7302 }, { "epoch": 0.97, "grad_norm": 0.3046875, "learning_rate": 4.582004755356835e-05, "loss": 2.2571, "step": 7303 }, { "epoch": 0.97, "grad_norm": 0.2890625, "learning_rate": 4.581887577155377e-05, "loss": 2.2802, "step": 7304 }, { "epoch": 0.97, "grad_norm": 0.310546875, "learning_rate": 4.581770384030486e-05, "loss": 2.2459, "step": 7305 }, { "epoch": 0.97, "grad_norm": 0.3125, "learning_rate": 4.581653175983003e-05, "loss": 2.2542, "step": 7306 }, { "epoch": 0.97, "grad_norm": 0.287109375, "learning_rate": 4.5815359530137664e-05, "loss": 2.2634, "step": 7307 }, { "epoch": 0.97, "grad_norm": 0.2890625, "learning_rate": 4.581418715123618e-05, "loss": 2.243, "step": 7308 }, { "epoch": 0.97, "grad_norm": 0.283203125, "learning_rate": 4.581301462313397e-05, "loss": 2.2486, "step": 7309 }, { "epoch": 0.98, "grad_norm": 0.291015625, "learning_rate": 4.581184194583945e-05, "loss": 2.2039, "step": 7310 }, { "epoch": 0.98, "grad_norm": 0.271484375, "learning_rate": 4.581066911936102e-05, "loss": 2.243, "step": 7311 }, { "epoch": 0.98, "grad_norm": 0.291015625, "learning_rate": 4.580949614370708e-05, "loss": 2.265, "step": 7312 }, { "epoch": 0.98, "grad_norm": 0.287109375, "learning_rate": 4.580832301888606e-05, "loss": 2.2681, "step": 7313 }, { "epoch": 0.98, "grad_norm": 0.2890625, "learning_rate": 4.580714974490634e-05, "loss": 2.2544, "step": 7314 }, { "epoch": 0.98, "grad_norm": 0.314453125, "learning_rate": 4.5805976321776354e-05, "loss": 2.2213, "step": 7315 }, { "epoch": 0.98, "grad_norm": 0.287109375, "learning_rate": 4.58048027495045e-05, "loss": 2.2814, "step": 7316 }, { "epoch": 0.98, "grad_norm": 0.27734375, "learning_rate": 4.5803629028099196e-05, "loss": 2.2629, "step": 7317 }, { "epoch": 0.98, "grad_norm": 0.291015625, "learning_rate": 4.5802455157568854e-05, "loss": 2.2624, "step": 7318 }, { "epoch": 0.98, "grad_norm": 0.2890625, "learning_rate": 4.580128113792189e-05, "loss": 2.2542, "step": 7319 }, { "epoch": 0.98, "grad_norm": 0.310546875, "learning_rate": 4.580010696916672e-05, "loss": 2.2426, "step": 7320 }, { "epoch": 0.98, "grad_norm": 0.3125, "learning_rate": 4.579893265131177e-05, "loss": 2.2787, "step": 7321 }, { "epoch": 0.98, "grad_norm": 0.302734375, "learning_rate": 4.579775818436544e-05, "loss": 2.2534, "step": 7322 }, { "epoch": 0.98, "grad_norm": 0.296875, "learning_rate": 4.579658356833615e-05, "loss": 2.267, "step": 7323 }, { "epoch": 0.98, "grad_norm": 0.302734375, "learning_rate": 4.5795408803232325e-05, "loss": 2.2533, "step": 7324 }, { "epoch": 0.98, "grad_norm": 0.31640625, "learning_rate": 4.579423388906239e-05, "loss": 2.2448, "step": 7325 }, { "epoch": 0.98, "grad_norm": 0.2734375, "learning_rate": 4.579305882583477e-05, "loss": 2.2508, "step": 7326 }, { "epoch": 0.98, "grad_norm": 0.302734375, "learning_rate": 4.5791883613557875e-05, "loss": 2.2902, "step": 7327 }, { "epoch": 0.98, "grad_norm": 0.3046875, "learning_rate": 4.579070825224015e-05, "loss": 2.2321, "step": 7328 }, { "epoch": 0.98, "grad_norm": 0.287109375, "learning_rate": 4.578953274188998e-05, "loss": 2.2552, "step": 7329 }, { "epoch": 0.98, "grad_norm": 0.29296875, "learning_rate": 4.578835708251584e-05, "loss": 2.2561, "step": 7330 }, { "epoch": 0.98, "grad_norm": 0.2734375, "learning_rate": 4.5787181274126136e-05, "loss": 2.2195, "step": 7331 }, { "epoch": 0.98, "grad_norm": 0.3046875, "learning_rate": 4.578600531672929e-05, "loss": 2.2669, "step": 7332 }, { "epoch": 0.98, "grad_norm": 0.2890625, "learning_rate": 4.578482921033373e-05, "loss": 2.2397, "step": 7333 }, { "epoch": 0.98, "grad_norm": 0.29296875, "learning_rate": 4.5783652954947895e-05, "loss": 2.2581, "step": 7334 }, { "epoch": 0.98, "grad_norm": 0.296875, "learning_rate": 4.578247655058022e-05, "loss": 2.2729, "step": 7335 }, { "epoch": 0.98, "grad_norm": 0.291015625, "learning_rate": 4.578129999723914e-05, "loss": 2.2891, "step": 7336 }, { "epoch": 0.98, "grad_norm": 0.294921875, "learning_rate": 4.578012329493307e-05, "loss": 2.2427, "step": 7337 }, { "epoch": 0.98, "grad_norm": 0.291015625, "learning_rate": 4.5778946443670466e-05, "loss": 2.2679, "step": 7338 }, { "epoch": 0.98, "grad_norm": 0.29296875, "learning_rate": 4.577776944345976e-05, "loss": 2.2477, "step": 7339 }, { "epoch": 0.98, "grad_norm": 0.3046875, "learning_rate": 4.577659229430937e-05, "loss": 2.2771, "step": 7340 }, { "epoch": 0.98, "grad_norm": 0.296875, "learning_rate": 4.577541499622776e-05, "loss": 2.2645, "step": 7341 }, { "epoch": 0.98, "grad_norm": 0.291015625, "learning_rate": 4.577423754922335e-05, "loss": 2.256, "step": 7342 }, { "epoch": 0.98, "grad_norm": 0.2890625, "learning_rate": 4.5773059953304586e-05, "loss": 2.2636, "step": 7343 }, { "epoch": 0.98, "grad_norm": 0.310546875, "learning_rate": 4.577188220847991e-05, "loss": 2.2633, "step": 7344 }, { "epoch": 0.98, "grad_norm": 0.30078125, "learning_rate": 4.577070431475777e-05, "loss": 2.2736, "step": 7345 }, { "epoch": 0.98, "grad_norm": 0.287109375, "learning_rate": 4.5769526272146604e-05, "loss": 2.2672, "step": 7346 }, { "epoch": 0.98, "grad_norm": 0.28125, "learning_rate": 4.5768348080654855e-05, "loss": 2.2344, "step": 7347 }, { "epoch": 0.98, "grad_norm": 0.28125, "learning_rate": 4.576716974029097e-05, "loss": 2.2569, "step": 7348 }, { "epoch": 0.98, "grad_norm": 0.296875, "learning_rate": 4.57659912510634e-05, "loss": 2.2473, "step": 7349 }, { "epoch": 0.98, "grad_norm": 0.29296875, "learning_rate": 4.576481261298058e-05, "loss": 2.2937, "step": 7350 }, { "epoch": 0.98, "grad_norm": 0.294921875, "learning_rate": 4.576363382605097e-05, "loss": 2.2631, "step": 7351 }, { "epoch": 0.98, "grad_norm": 0.298828125, "learning_rate": 4.576245489028303e-05, "loss": 2.2824, "step": 7352 }, { "epoch": 0.98, "grad_norm": 0.28125, "learning_rate": 4.576127580568518e-05, "loss": 2.2838, "step": 7353 }, { "epoch": 0.98, "grad_norm": 0.279296875, "learning_rate": 4.57600965722659e-05, "loss": 2.2506, "step": 7354 }, { "epoch": 0.98, "grad_norm": 0.28515625, "learning_rate": 4.575891719003363e-05, "loss": 2.2976, "step": 7355 }, { "epoch": 0.98, "grad_norm": 0.3046875, "learning_rate": 4.575773765899682e-05, "loss": 2.2953, "step": 7356 }, { "epoch": 0.98, "grad_norm": 0.291015625, "learning_rate": 4.575655797916395e-05, "loss": 2.2315, "step": 7357 }, { "epoch": 0.98, "grad_norm": 0.275390625, "learning_rate": 4.575537815054344e-05, "loss": 2.2638, "step": 7358 }, { "epoch": 0.98, "grad_norm": 0.296875, "learning_rate": 4.575419817314378e-05, "loss": 2.2792, "step": 7359 }, { "epoch": 0.98, "grad_norm": 0.3046875, "learning_rate": 4.575301804697341e-05, "loss": 2.2397, "step": 7360 }, { "epoch": 0.98, "grad_norm": 0.275390625, "learning_rate": 4.575183777204078e-05, "loss": 2.2323, "step": 7361 }, { "epoch": 0.98, "grad_norm": 0.28515625, "learning_rate": 4.575065734835438e-05, "loss": 2.2692, "step": 7362 }, { "epoch": 0.98, "grad_norm": 0.27734375, "learning_rate": 4.574947677592265e-05, "loss": 2.2872, "step": 7363 }, { "epoch": 0.98, "grad_norm": 0.27734375, "learning_rate": 4.574829605475406e-05, "loss": 2.2293, "step": 7364 }, { "epoch": 0.98, "grad_norm": 0.287109375, "learning_rate": 4.574711518485707e-05, "loss": 2.2504, "step": 7365 }, { "epoch": 0.98, "grad_norm": 0.287109375, "learning_rate": 4.574593416624015e-05, "loss": 2.2824, "step": 7366 }, { "epoch": 0.98, "grad_norm": 0.29296875, "learning_rate": 4.574475299891175e-05, "loss": 2.2348, "step": 7367 }, { "epoch": 0.98, "grad_norm": 0.287109375, "learning_rate": 4.574357168288036e-05, "loss": 2.3036, "step": 7368 }, { "epoch": 0.98, "grad_norm": 0.31640625, "learning_rate": 4.5742390218154444e-05, "loss": 2.2333, "step": 7369 }, { "epoch": 0.98, "grad_norm": 0.287109375, "learning_rate": 4.574120860474245e-05, "loss": 2.2752, "step": 7370 }, { "epoch": 0.98, "grad_norm": 0.291015625, "learning_rate": 4.574002684265287e-05, "loss": 2.252, "step": 7371 }, { "epoch": 0.98, "grad_norm": 0.28515625, "learning_rate": 4.5738844931894166e-05, "loss": 2.2707, "step": 7372 }, { "epoch": 0.98, "grad_norm": 0.2890625, "learning_rate": 4.5737662872474805e-05, "loss": 2.27, "step": 7373 }, { "epoch": 0.98, "grad_norm": 0.28515625, "learning_rate": 4.573648066440327e-05, "loss": 2.2442, "step": 7374 }, { "epoch": 0.98, "grad_norm": 0.29296875, "learning_rate": 4.5735298307688035e-05, "loss": 2.2863, "step": 7375 }, { "epoch": 0.98, "grad_norm": 0.287109375, "learning_rate": 4.5734115802337574e-05, "loss": 2.2761, "step": 7376 }, { "epoch": 0.98, "grad_norm": 0.3046875, "learning_rate": 4.5732933148360356e-05, "loss": 2.2441, "step": 7377 }, { "epoch": 0.98, "grad_norm": 0.291015625, "learning_rate": 4.5731750345764864e-05, "loss": 2.305, "step": 7378 }, { "epoch": 0.98, "grad_norm": 0.28515625, "learning_rate": 4.5730567394559584e-05, "loss": 2.2717, "step": 7379 }, { "epoch": 0.98, "grad_norm": 0.29296875, "learning_rate": 4.5729384294752984e-05, "loss": 2.2677, "step": 7380 }, { "epoch": 0.98, "grad_norm": 0.310546875, "learning_rate": 4.5728201046353556e-05, "loss": 2.2575, "step": 7381 }, { "epoch": 0.98, "grad_norm": 0.28515625, "learning_rate": 4.572701764936976e-05, "loss": 2.2404, "step": 7382 }, { "epoch": 0.98, "grad_norm": 0.318359375, "learning_rate": 4.572583410381011e-05, "loss": 2.2575, "step": 7383 }, { "epoch": 0.98, "grad_norm": 0.30078125, "learning_rate": 4.5724650409683066e-05, "loss": 2.2583, "step": 7384 }, { "epoch": 0.99, "grad_norm": 0.298828125, "learning_rate": 4.572346656699713e-05, "loss": 2.2668, "step": 7385 }, { "epoch": 0.99, "grad_norm": 0.310546875, "learning_rate": 4.5722282575760764e-05, "loss": 2.2663, "step": 7386 }, { "epoch": 0.99, "grad_norm": 0.28515625, "learning_rate": 4.572109843598248e-05, "loss": 2.2465, "step": 7387 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.5719914147670756e-05, "loss": 2.2444, "step": 7388 }, { "epoch": 0.99, "grad_norm": 0.291015625, "learning_rate": 4.571872971083408e-05, "loss": 2.2436, "step": 7389 }, { "epoch": 0.99, "grad_norm": 0.28515625, "learning_rate": 4.5717545125480945e-05, "loss": 2.2938, "step": 7390 }, { "epoch": 0.99, "grad_norm": 0.29296875, "learning_rate": 4.571636039161984e-05, "loss": 2.2658, "step": 7391 }, { "epoch": 0.99, "grad_norm": 0.310546875, "learning_rate": 4.571517550925926e-05, "loss": 2.2397, "step": 7392 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.5713990478407694e-05, "loss": 2.2794, "step": 7393 }, { "epoch": 0.99, "grad_norm": 0.29296875, "learning_rate": 4.571280529907363e-05, "loss": 2.2569, "step": 7394 }, { "epoch": 0.99, "grad_norm": 0.31640625, "learning_rate": 4.571161997126559e-05, "loss": 2.2779, "step": 7395 }, { "epoch": 0.99, "grad_norm": 0.291015625, "learning_rate": 4.571043449499205e-05, "loss": 2.257, "step": 7396 }, { "epoch": 0.99, "grad_norm": 0.287109375, "learning_rate": 4.570924887026151e-05, "loss": 2.2844, "step": 7397 }, { "epoch": 0.99, "grad_norm": 0.29296875, "learning_rate": 4.5708063097082474e-05, "loss": 2.2593, "step": 7398 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.5706877175463436e-05, "loss": 2.2564, "step": 7399 }, { "epoch": 0.99, "grad_norm": 0.306640625, "learning_rate": 4.57056911054129e-05, "loss": 2.2958, "step": 7400 }, { "epoch": 0.99, "grad_norm": 0.30859375, "learning_rate": 4.570450488693936e-05, "loss": 2.2669, "step": 7401 }, { "epoch": 0.99, "grad_norm": 0.29296875, "learning_rate": 4.570331852005134e-05, "loss": 2.2577, "step": 7402 }, { "epoch": 0.99, "grad_norm": 0.29296875, "learning_rate": 4.5702132004757315e-05, "loss": 2.2381, "step": 7403 }, { "epoch": 0.99, "grad_norm": 0.3125, "learning_rate": 4.570094534106582e-05, "loss": 2.2693, "step": 7404 }, { "epoch": 0.99, "grad_norm": 0.27734375, "learning_rate": 4.5699758528985334e-05, "loss": 2.2652, "step": 7405 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.56985715685244e-05, "loss": 2.2955, "step": 7406 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.5697384459691486e-05, "loss": 2.2772, "step": 7407 }, { "epoch": 0.99, "grad_norm": 0.318359375, "learning_rate": 4.5696197202495115e-05, "loss": 2.2273, "step": 7408 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.569500979694381e-05, "loss": 2.3021, "step": 7409 }, { "epoch": 0.99, "grad_norm": 0.298828125, "learning_rate": 4.5693822243046075e-05, "loss": 2.2163, "step": 7410 }, { "epoch": 0.99, "grad_norm": 0.3046875, "learning_rate": 4.569263454081042e-05, "loss": 2.273, "step": 7411 }, { "epoch": 0.99, "grad_norm": 0.3125, "learning_rate": 4.569144669024536e-05, "loss": 2.2768, "step": 7412 }, { "epoch": 0.99, "grad_norm": 0.287109375, "learning_rate": 4.569025869135941e-05, "loss": 2.2644, "step": 7413 }, { "epoch": 0.99, "grad_norm": 0.314453125, "learning_rate": 4.568907054416109e-05, "loss": 2.2875, "step": 7414 }, { "epoch": 0.99, "grad_norm": 0.30859375, "learning_rate": 4.5687882248658906e-05, "loss": 2.2622, "step": 7415 }, { "epoch": 0.99, "grad_norm": 0.2734375, "learning_rate": 4.568669380486139e-05, "loss": 2.2809, "step": 7416 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.568550521277706e-05, "loss": 2.2569, "step": 7417 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.568431647241442e-05, "loss": 2.2686, "step": 7418 }, { "epoch": 0.99, "grad_norm": 0.298828125, "learning_rate": 4.5683127583782e-05, "loss": 2.245, "step": 7419 }, { "epoch": 0.99, "grad_norm": 0.283203125, "learning_rate": 4.568193854688834e-05, "loss": 2.2971, "step": 7420 }, { "epoch": 0.99, "grad_norm": 0.291015625, "learning_rate": 4.568074936174194e-05, "loss": 2.236, "step": 7421 }, { "epoch": 0.99, "grad_norm": 0.287109375, "learning_rate": 4.567956002835132e-05, "loss": 2.2545, "step": 7422 }, { "epoch": 0.99, "grad_norm": 0.3046875, "learning_rate": 4.567837054672502e-05, "loss": 2.2746, "step": 7423 }, { "epoch": 0.99, "grad_norm": 0.3046875, "learning_rate": 4.5677180916871575e-05, "loss": 2.2782, "step": 7424 }, { "epoch": 0.99, "grad_norm": 0.3125, "learning_rate": 4.567599113879949e-05, "loss": 2.2639, "step": 7425 }, { "epoch": 0.99, "grad_norm": 0.2890625, "learning_rate": 4.567480121251731e-05, "loss": 2.2527, "step": 7426 }, { "epoch": 0.99, "grad_norm": 0.2890625, "learning_rate": 4.567361113803356e-05, "loss": 2.3113, "step": 7427 }, { "epoch": 0.99, "grad_norm": 0.3046875, "learning_rate": 4.567242091535676e-05, "loss": 2.2669, "step": 7428 }, { "epoch": 0.99, "grad_norm": 0.283203125, "learning_rate": 4.5671230544495465e-05, "loss": 2.2403, "step": 7429 }, { "epoch": 0.99, "grad_norm": 0.291015625, "learning_rate": 4.567004002545818e-05, "loss": 2.292, "step": 7430 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.5668849358253466e-05, "loss": 2.2761, "step": 7431 }, { "epoch": 0.99, "grad_norm": 0.291015625, "learning_rate": 4.566765854288984e-05, "loss": 2.2807, "step": 7432 }, { "epoch": 0.99, "grad_norm": 0.28515625, "learning_rate": 4.5666467579375845e-05, "loss": 2.2491, "step": 7433 }, { "epoch": 0.99, "grad_norm": 0.291015625, "learning_rate": 4.566527646772001e-05, "loss": 2.2409, "step": 7434 }, { "epoch": 0.99, "grad_norm": 0.30078125, "learning_rate": 4.566408520793089e-05, "loss": 2.2377, "step": 7435 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.5662893800017e-05, "loss": 2.2961, "step": 7436 }, { "epoch": 0.99, "grad_norm": 0.294921875, "learning_rate": 4.566170224398691e-05, "loss": 2.2744, "step": 7437 }, { "epoch": 0.99, "grad_norm": 0.279296875, "learning_rate": 4.566051053984914e-05, "loss": 2.2796, "step": 7438 }, { "epoch": 0.99, "grad_norm": 0.2890625, "learning_rate": 4.565931868761223e-05, "loss": 2.2422, "step": 7439 }, { "epoch": 0.99, "grad_norm": 0.3046875, "learning_rate": 4.565812668728474e-05, "loss": 2.2637, "step": 7440 }, { "epoch": 0.99, "grad_norm": 0.30859375, "learning_rate": 4.56569345388752e-05, "loss": 2.2437, "step": 7441 }, { "epoch": 0.99, "grad_norm": 0.291015625, "learning_rate": 4.5655742242392165e-05, "loss": 2.2577, "step": 7442 }, { "epoch": 0.99, "grad_norm": 0.287109375, "learning_rate": 4.565454979784418e-05, "loss": 2.2291, "step": 7443 }, { "epoch": 0.99, "grad_norm": 0.287109375, "learning_rate": 4.565335720523979e-05, "loss": 2.2634, "step": 7444 }, { "epoch": 0.99, "grad_norm": 0.30859375, "learning_rate": 4.565216446458754e-05, "loss": 2.224, "step": 7445 }, { "epoch": 0.99, "grad_norm": 0.283203125, "learning_rate": 4.565097157589599e-05, "loss": 2.2771, "step": 7446 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.5649778539173684e-05, "loss": 2.2705, "step": 7447 }, { "epoch": 0.99, "grad_norm": 0.279296875, "learning_rate": 4.564858535442916e-05, "loss": 2.2733, "step": 7448 }, { "epoch": 0.99, "grad_norm": 0.29296875, "learning_rate": 4.564739202167101e-05, "loss": 2.236, "step": 7449 }, { "epoch": 0.99, "grad_norm": 0.302734375, "learning_rate": 4.564619854090775e-05, "loss": 2.2547, "step": 7450 }, { "epoch": 0.99, "grad_norm": 0.291015625, "learning_rate": 4.5645004912147946e-05, "loss": 2.2894, "step": 7451 }, { "epoch": 0.99, "grad_norm": 0.296875, "learning_rate": 4.564381113540017e-05, "loss": 2.2676, "step": 7452 }, { "epoch": 0.99, "grad_norm": 0.2890625, "learning_rate": 4.564261721067296e-05, "loss": 2.2419, "step": 7453 }, { "epoch": 0.99, "grad_norm": 0.29296875, "learning_rate": 4.564142313797488e-05, "loss": 2.2415, "step": 7454 }, { "epoch": 0.99, "grad_norm": 0.3046875, "learning_rate": 4.56402289173145e-05, "loss": 2.2621, "step": 7455 }, { "epoch": 0.99, "grad_norm": 0.30859375, "learning_rate": 4.563903454870036e-05, "loss": 2.2392, "step": 7456 }, { "epoch": 0.99, "grad_norm": 0.279296875, "learning_rate": 4.563784003214103e-05, "loss": 2.2696, "step": 7457 }, { "epoch": 0.99, "grad_norm": 0.30078125, "learning_rate": 4.563664536764509e-05, "loss": 2.2576, "step": 7458 }, { "epoch": 0.99, "grad_norm": 0.291015625, "learning_rate": 4.563545055522108e-05, "loss": 2.2397, "step": 7459 }, { "epoch": 1.0, "grad_norm": 0.30078125, "learning_rate": 4.5634255594877576e-05, "loss": 2.2588, "step": 7460 }, { "epoch": 1.0, "grad_norm": 0.28515625, "learning_rate": 4.563306048662314e-05, "loss": 2.2719, "step": 7461 }, { "epoch": 1.0, "grad_norm": 0.296875, "learning_rate": 4.563186523046633e-05, "loss": 2.2906, "step": 7462 }, { "epoch": 1.0, "grad_norm": 0.294921875, "learning_rate": 4.563066982641574e-05, "loss": 2.2573, "step": 7463 }, { "epoch": 1.0, "grad_norm": 0.302734375, "learning_rate": 4.562947427447991e-05, "loss": 2.2646, "step": 7464 }, { "epoch": 1.0, "grad_norm": 0.279296875, "learning_rate": 4.562827857466743e-05, "loss": 2.2693, "step": 7465 }, { "epoch": 1.0, "grad_norm": 0.291015625, "learning_rate": 4.562708272698686e-05, "loss": 2.2812, "step": 7466 }, { "epoch": 1.0, "grad_norm": 0.28515625, "learning_rate": 4.562588673144678e-05, "loss": 2.2517, "step": 7467 }, { "epoch": 1.0, "grad_norm": 0.294921875, "learning_rate": 4.562469058805575e-05, "loss": 2.2563, "step": 7468 }, { "epoch": 1.0, "grad_norm": 0.279296875, "learning_rate": 4.562349429682236e-05, "loss": 2.2578, "step": 7469 }, { "epoch": 1.0, "grad_norm": 0.298828125, "learning_rate": 4.5622297857755175e-05, "loss": 2.2684, "step": 7470 }, { "epoch": 1.0, "grad_norm": 0.287109375, "learning_rate": 4.562110127086278e-05, "loss": 2.2471, "step": 7471 }, { "epoch": 1.0, "grad_norm": 0.326171875, "learning_rate": 4.561990453615374e-05, "loss": 2.2679, "step": 7472 }, { "epoch": 1.0, "grad_norm": 0.2734375, "learning_rate": 4.5618707653636647e-05, "loss": 2.2565, "step": 7473 }, { "epoch": 1.0, "grad_norm": 0.283203125, "learning_rate": 4.561751062332007e-05, "loss": 2.267, "step": 7474 }, { "epoch": 1.0, "grad_norm": 0.28125, "learning_rate": 4.56163134452126e-05, "loss": 2.2191, "step": 7475 }, { "epoch": 1.0, "grad_norm": 0.283203125, "learning_rate": 4.561511611932281e-05, "loss": 2.2676, "step": 7476 }, { "epoch": 1.0, "grad_norm": 0.314453125, "learning_rate": 4.561391864565928e-05, "loss": 2.2638, "step": 7477 }, { "epoch": 1.0, "grad_norm": 0.294921875, "learning_rate": 4.56127210242306e-05, "loss": 2.2277, "step": 7478 }, { "epoch": 1.0, "grad_norm": 0.3046875, "learning_rate": 4.561152325504536e-05, "loss": 2.2585, "step": 7479 }, { "epoch": 1.0, "grad_norm": 0.28515625, "learning_rate": 4.5610325338112135e-05, "loss": 2.2603, "step": 7480 }, { "epoch": 1.0, "grad_norm": 0.291015625, "learning_rate": 4.560912727343951e-05, "loss": 2.2227, "step": 7481 }, { "epoch": 1.0, "grad_norm": 0.294921875, "learning_rate": 4.560792906103609e-05, "loss": 2.2906, "step": 7482 }, { "epoch": 1.0, "grad_norm": 0.3046875, "learning_rate": 4.560673070091045e-05, "loss": 2.2611, "step": 7483 }, { "epoch": 1.0, "grad_norm": 0.310546875, "learning_rate": 4.560553219307119e-05, "loss": 2.2335, "step": 7484 }, { "epoch": 1.0, "grad_norm": 0.28125, "learning_rate": 4.560433353752688e-05, "loss": 2.2807, "step": 7485 }, { "epoch": 1.0, "grad_norm": 0.296875, "learning_rate": 4.560313473428614e-05, "loss": 2.2587, "step": 7486 }, { "epoch": 1.0, "grad_norm": 0.275390625, "learning_rate": 4.560193578335754e-05, "loss": 2.2593, "step": 7487 }, { "epoch": 1.0, "grad_norm": 0.287109375, "learning_rate": 4.56007366847497e-05, "loss": 2.2612, "step": 7488 }, { "epoch": 1.0, "grad_norm": 0.29296875, "learning_rate": 4.559953743847118e-05, "loss": 2.2735, "step": 7489 }, { "epoch": 1.0, "grad_norm": 0.279296875, "learning_rate": 4.5598338044530607e-05, "loss": 2.229, "step": 7490 }, { "epoch": 1.0, "grad_norm": 0.275390625, "learning_rate": 4.5597138502936575e-05, "loss": 2.2705, "step": 7491 }, { "epoch": 1.0, "grad_norm": 0.283203125, "learning_rate": 4.5595938813697666e-05, "loss": 2.2402, "step": 7492 }, { "epoch": 1.0, "grad_norm": 0.291015625, "learning_rate": 4.5594738976822494e-05, "loss": 2.2778, "step": 7493 }, { "epoch": 1.0, "grad_norm": 0.275390625, "learning_rate": 4.559353899231965e-05, "loss": 2.2342, "step": 7494 }, { "epoch": 1.0, "grad_norm": 0.294921875, "learning_rate": 4.559233886019775e-05, "loss": 2.2938, "step": 7495 }, { "epoch": 1.0, "grad_norm": 0.28515625, "learning_rate": 4.559113858046538e-05, "loss": 2.256, "step": 7496 }, { "epoch": 1.0, "grad_norm": 0.328125, "learning_rate": 4.558993815313115e-05, "loss": 2.2484, "step": 7497 }, { "epoch": 1.0, "grad_norm": 0.29296875, "learning_rate": 4.5588737578203664e-05, "loss": 2.2544, "step": 7498 }, { "epoch": 1.0, "grad_norm": 0.28515625, "learning_rate": 4.558753685569154e-05, "loss": 2.2754, "step": 7499 }, { "epoch": 1.0, "grad_norm": 0.279296875, "learning_rate": 4.558633598560337e-05, "loss": 2.229, "step": 7500 }, { "epoch": 1.0, "grad_norm": 0.29296875, "learning_rate": 4.558513496794776e-05, "loss": 2.2704, "step": 7501 }, { "epoch": 1.0, "grad_norm": 0.3125, "learning_rate": 4.5583933802733334e-05, "loss": 2.2606, "step": 7502 }, { "epoch": 1.0, "grad_norm": 0.29296875, "learning_rate": 4.55827324899687e-05, "loss": 2.2145, "step": 7503 }, { "epoch": 1.0, "grad_norm": 0.279296875, "learning_rate": 4.558153102966246e-05, "loss": 2.2533, "step": 7504 }, { "epoch": 1.0, "grad_norm": 0.279296875, "learning_rate": 4.558032942182323e-05, "loss": 2.2508, "step": 7505 }, { "epoch": 1.0, "grad_norm": 0.30078125, "learning_rate": 4.5579127666459624e-05, "loss": 2.2633, "step": 7506 }, { "epoch": 1.0, "grad_norm": 0.296875, "learning_rate": 4.557792576358026e-05, "loss": 2.2247, "step": 7507 }, { "epoch": 1.0, "grad_norm": 0.302734375, "learning_rate": 4.557672371319375e-05, "loss": 2.2485, "step": 7508 }, { "epoch": 1.0, "grad_norm": 0.294921875, "learning_rate": 4.5575521515308704e-05, "loss": 2.2532, "step": 7509 }, { "epoch": 1.0, "grad_norm": 0.291015625, "learning_rate": 4.557431916993374e-05, "loss": 2.277, "step": 7510 }, { "epoch": 1.0, "grad_norm": 0.298828125, "learning_rate": 4.55731166770775e-05, "loss": 2.2386, "step": 7511 }, { "epoch": 1.0, "grad_norm": 0.30078125, "learning_rate": 4.557191403674857e-05, "loss": 2.2792, "step": 7512 }, { "epoch": 1.0, "grad_norm": 0.302734375, "learning_rate": 4.55707112489556e-05, "loss": 2.2653, "step": 7513 }, { "epoch": 1.0, "grad_norm": 0.28515625, "learning_rate": 4.556950831370719e-05, "loss": 2.3123, "step": 7514 }, { "epoch": 1.0, "grad_norm": 0.296875, "learning_rate": 4.5568305231011976e-05, "loss": 2.2358, "step": 7515 }, { "epoch": 1.0, "grad_norm": 0.296875, "learning_rate": 4.556710200087858e-05, "loss": 2.2787, "step": 7516 }, { "epoch": 1.0, "grad_norm": 0.29296875, "learning_rate": 4.556589862331562e-05, "loss": 2.2465, "step": 7517 }, { "epoch": 1.0, "grad_norm": 0.2890625, "learning_rate": 4.556469509833173e-05, "loss": 2.3059, "step": 7518 }, { "epoch": 1.0, "grad_norm": 0.296875, "learning_rate": 4.556349142593554e-05, "loss": 2.2702, "step": 7519 }, { "epoch": 1.0, "grad_norm": 0.296875, "learning_rate": 4.5562287606135666e-05, "loss": 2.2564, "step": 7520 }, { "epoch": 1.0, "grad_norm": 0.2890625, "learning_rate": 4.5561083638940744e-05, "loss": 2.2421, "step": 7521 }, { "epoch": 1.0, "grad_norm": 0.283203125, "learning_rate": 4.5559879524359405e-05, "loss": 2.2579, "step": 7522 }, { "epoch": 1.0, "grad_norm": 0.30078125, "learning_rate": 4.5558675262400276e-05, "loss": 2.261, "step": 7523 }, { "epoch": 1.0, "grad_norm": 0.306640625, "learning_rate": 4.555747085307199e-05, "loss": 2.2673, "step": 7524 }, { "epoch": 1.0, "grad_norm": 0.2890625, "learning_rate": 4.5556266296383194e-05, "loss": 2.2612, "step": 7525 }, { "epoch": 1.0, "grad_norm": 0.287109375, "learning_rate": 4.555506159234251e-05, "loss": 2.2823, "step": 7526 }, { "epoch": 1.0, "grad_norm": 0.2890625, "learning_rate": 4.555385674095857e-05, "loss": 2.2632, "step": 7527 }, { "epoch": 1.0, "grad_norm": 0.296875, "learning_rate": 4.555265174224002e-05, "loss": 2.2357, "step": 7528 }, { "epoch": 1.0, "grad_norm": 0.28125, "learning_rate": 4.555144659619548e-05, "loss": 2.238, "step": 7529 }, { "epoch": 1.0, "grad_norm": 0.291015625, "learning_rate": 4.555024130283362e-05, "loss": 2.3138, "step": 7530 }, { "epoch": 1.0, "grad_norm": 0.28125, "learning_rate": 4.5549035862163046e-05, "loss": 2.2644, "step": 7531 }, { "epoch": 1.0, "grad_norm": 0.296875, "learning_rate": 4.5547830274192425e-05, "loss": 2.2486, "step": 7532 }, { "epoch": 1.0, "grad_norm": 0.287109375, "learning_rate": 4.554662453893039e-05, "loss": 2.2455, "step": 7533 }, { "epoch": 1.0, "grad_norm": 0.306640625, "learning_rate": 4.5545418656385586e-05, "loss": 2.2376, "step": 7534 }, { "epoch": 1.01, "grad_norm": 0.283203125, "learning_rate": 4.5544212626566646e-05, "loss": 2.2498, "step": 7535 }, { "epoch": 1.01, "grad_norm": 0.29296875, "learning_rate": 4.5543006449482225e-05, "loss": 2.2378, "step": 7536 }, { "epoch": 1.01, "grad_norm": 0.287109375, "learning_rate": 4.554180012514097e-05, "loss": 2.2704, "step": 7537 }, { "epoch": 1.01, "grad_norm": 0.30859375, "learning_rate": 4.554059365355152e-05, "loss": 2.2622, "step": 7538 }, { "epoch": 1.01, "grad_norm": 0.29296875, "learning_rate": 4.5539387034722534e-05, "loss": 2.2615, "step": 7539 }, { "epoch": 1.01, "grad_norm": 0.2890625, "learning_rate": 4.553818026866266e-05, "loss": 2.2759, "step": 7540 }, { "epoch": 1.01, "grad_norm": 0.296875, "learning_rate": 4.5536973355380534e-05, "loss": 2.2743, "step": 7541 }, { "epoch": 1.01, "grad_norm": 0.294921875, "learning_rate": 4.553576629488482e-05, "loss": 2.2769, "step": 7542 }, { "epoch": 1.01, "grad_norm": 0.29296875, "learning_rate": 4.553455908718418e-05, "loss": 2.2955, "step": 7543 }, { "epoch": 1.01, "grad_norm": 0.27734375, "learning_rate": 4.553335173228724e-05, "loss": 2.2913, "step": 7544 }, { "epoch": 1.01, "grad_norm": 0.291015625, "learning_rate": 4.5532144230202675e-05, "loss": 2.2799, "step": 7545 }, { "epoch": 1.01, "grad_norm": 0.2890625, "learning_rate": 4.5530936580939146e-05, "loss": 2.24, "step": 7546 }, { "epoch": 1.01, "grad_norm": 0.30078125, "learning_rate": 4.552972878450529e-05, "loss": 2.2637, "step": 7547 }, { "epoch": 1.01, "grad_norm": 0.294921875, "learning_rate": 4.552852084090978e-05, "loss": 2.272, "step": 7548 }, { "epoch": 1.01, "grad_norm": 0.3125, "learning_rate": 4.5527312750161265e-05, "loss": 2.2611, "step": 7549 }, { "epoch": 1.01, "grad_norm": 0.2890625, "learning_rate": 4.5526104512268405e-05, "loss": 2.2546, "step": 7550 }, { "epoch": 1.01, "grad_norm": 0.322265625, "learning_rate": 4.552489612723987e-05, "loss": 2.3049, "step": 7551 }, { "epoch": 1.01, "grad_norm": 0.3046875, "learning_rate": 4.552368759508432e-05, "loss": 2.2652, "step": 7552 }, { "epoch": 1.01, "grad_norm": 0.296875, "learning_rate": 4.552247891581041e-05, "loss": 2.2512, "step": 7553 }, { "epoch": 1.01, "grad_norm": 0.31640625, "learning_rate": 4.5521270089426817e-05, "loss": 2.2706, "step": 7554 }, { "epoch": 1.01, "grad_norm": 0.3125, "learning_rate": 4.552006111594219e-05, "loss": 2.2701, "step": 7555 }, { "epoch": 1.01, "grad_norm": 0.29296875, "learning_rate": 4.55188519953652e-05, "loss": 2.2504, "step": 7556 }, { "epoch": 1.01, "grad_norm": 0.287109375, "learning_rate": 4.5517642727704525e-05, "loss": 2.2755, "step": 7557 }, { "epoch": 1.01, "grad_norm": 0.2890625, "learning_rate": 4.551643331296883e-05, "loss": 2.2321, "step": 7558 }, { "epoch": 1.01, "grad_norm": 0.291015625, "learning_rate": 4.551522375116677e-05, "loss": 2.279, "step": 7559 }, { "epoch": 1.01, "grad_norm": 0.298828125, "learning_rate": 4.551401404230703e-05, "loss": 2.2422, "step": 7560 }, { "epoch": 1.01, "grad_norm": 0.287109375, "learning_rate": 4.551280418639828e-05, "loss": 2.2738, "step": 7561 }, { "epoch": 1.01, "grad_norm": 0.326171875, "learning_rate": 4.551159418344919e-05, "loss": 2.2273, "step": 7562 }, { "epoch": 1.01, "grad_norm": 0.291015625, "learning_rate": 4.5510384033468433e-05, "loss": 2.3061, "step": 7563 }, { "epoch": 1.01, "grad_norm": 0.30859375, "learning_rate": 4.550917373646468e-05, "loss": 2.2761, "step": 7564 }, { "epoch": 1.01, "grad_norm": 0.31640625, "learning_rate": 4.550796329244661e-05, "loss": 2.2858, "step": 7565 }, { "epoch": 1.01, "grad_norm": 0.29296875, "learning_rate": 4.5506752701422906e-05, "loss": 2.2508, "step": 7566 }, { "epoch": 1.01, "grad_norm": 0.283203125, "learning_rate": 4.5505541963402235e-05, "loss": 2.2697, "step": 7567 }, { "epoch": 1.01, "grad_norm": 0.287109375, "learning_rate": 4.550433107839329e-05, "loss": 2.2551, "step": 7568 }, { "epoch": 1.01, "grad_norm": 0.30078125, "learning_rate": 4.550312004640474e-05, "loss": 2.2222, "step": 7569 }, { "epoch": 1.01, "grad_norm": 0.294921875, "learning_rate": 4.5501908867445256e-05, "loss": 2.2764, "step": 7570 }, { "epoch": 1.01, "grad_norm": 0.279296875, "learning_rate": 4.550069754152354e-05, "loss": 2.2744, "step": 7571 }, { "epoch": 1.01, "grad_norm": 0.294921875, "learning_rate": 4.549948606864827e-05, "loss": 2.2807, "step": 7572 }, { "epoch": 1.01, "grad_norm": 0.28515625, "learning_rate": 4.5498274448828115e-05, "loss": 2.2525, "step": 7573 }, { "epoch": 1.01, "grad_norm": 0.27734375, "learning_rate": 4.549706268207178e-05, "loss": 2.2755, "step": 7574 }, { "epoch": 1.01, "grad_norm": 0.279296875, "learning_rate": 4.5495850768387946e-05, "loss": 2.2131, "step": 7575 }, { "epoch": 1.01, "grad_norm": 0.3046875, "learning_rate": 4.54946387077853e-05, "loss": 2.2787, "step": 7576 }, { "epoch": 1.01, "grad_norm": 0.279296875, "learning_rate": 4.549342650027252e-05, "loss": 2.2647, "step": 7577 }, { "epoch": 1.01, "grad_norm": 0.3046875, "learning_rate": 4.5492214145858304e-05, "loss": 2.2319, "step": 7578 }, { "epoch": 1.01, "grad_norm": 0.283203125, "learning_rate": 4.5491001644551346e-05, "loss": 2.2807, "step": 7579 }, { "epoch": 1.01, "grad_norm": 0.3046875, "learning_rate": 4.548978899636033e-05, "loss": 2.288, "step": 7580 }, { "epoch": 1.01, "grad_norm": 0.298828125, "learning_rate": 4.5488576201293956e-05, "loss": 2.2433, "step": 7581 }, { "epoch": 1.01, "grad_norm": 0.30078125, "learning_rate": 4.5487363259360905e-05, "loss": 2.2516, "step": 7582 }, { "epoch": 1.01, "grad_norm": 0.298828125, "learning_rate": 4.5486150170569886e-05, "loss": 2.2274, "step": 7583 }, { "epoch": 1.01, "grad_norm": 0.29296875, "learning_rate": 4.548493693492959e-05, "loss": 2.3019, "step": 7584 }, { "epoch": 1.01, "grad_norm": 0.2734375, "learning_rate": 4.548372355244871e-05, "loss": 2.2382, "step": 7585 }, { "epoch": 1.01, "grad_norm": 0.302734375, "learning_rate": 4.548251002313595e-05, "loss": 2.2566, "step": 7586 }, { "epoch": 1.01, "grad_norm": 0.283203125, "learning_rate": 4.5481296347e-05, "loss": 2.2798, "step": 7587 }, { "epoch": 1.01, "grad_norm": 0.28515625, "learning_rate": 4.548008252404956e-05, "loss": 2.2412, "step": 7588 }, { "epoch": 1.01, "grad_norm": 0.30078125, "learning_rate": 4.547886855429335e-05, "loss": 2.2699, "step": 7589 }, { "epoch": 1.01, "grad_norm": 0.279296875, "learning_rate": 4.547765443774005e-05, "loss": 2.2398, "step": 7590 }, { "epoch": 1.01, "grad_norm": 0.296875, "learning_rate": 4.547644017439837e-05, "loss": 2.2913, "step": 7591 }, { "epoch": 1.01, "grad_norm": 0.287109375, "learning_rate": 4.547522576427702e-05, "loss": 2.2651, "step": 7592 }, { "epoch": 1.01, "grad_norm": 0.27734375, "learning_rate": 4.547401120738469e-05, "loss": 2.2432, "step": 7593 }, { "epoch": 1.01, "grad_norm": 0.306640625, "learning_rate": 4.5472796503730106e-05, "loss": 2.2438, "step": 7594 }, { "epoch": 1.01, "grad_norm": 0.30078125, "learning_rate": 4.547158165332196e-05, "loss": 2.27, "step": 7595 }, { "epoch": 1.01, "grad_norm": 0.296875, "learning_rate": 4.547036665616897e-05, "loss": 2.2832, "step": 7596 }, { "epoch": 1.01, "grad_norm": 0.296875, "learning_rate": 4.546915151227984e-05, "loss": 2.2279, "step": 7597 }, { "epoch": 1.01, "grad_norm": 0.31640625, "learning_rate": 4.546793622166329e-05, "loss": 2.264, "step": 7598 }, { "epoch": 1.01, "grad_norm": 0.2890625, "learning_rate": 4.546672078432801e-05, "loss": 2.2741, "step": 7599 }, { "epoch": 1.01, "grad_norm": 0.298828125, "learning_rate": 4.546550520028273e-05, "loss": 2.2511, "step": 7600 }, { "epoch": 1.01, "grad_norm": 0.296875, "learning_rate": 4.546428946953616e-05, "loss": 2.2462, "step": 7601 }, { "epoch": 1.01, "grad_norm": 0.291015625, "learning_rate": 4.546307359209702e-05, "loss": 2.2571, "step": 7602 }, { "epoch": 1.01, "grad_norm": 0.294921875, "learning_rate": 4.546185756797401e-05, "loss": 2.2603, "step": 7603 }, { "epoch": 1.01, "grad_norm": 0.2890625, "learning_rate": 4.5460641397175865e-05, "loss": 2.2268, "step": 7604 }, { "epoch": 1.01, "grad_norm": 0.28125, "learning_rate": 4.5459425079711285e-05, "loss": 2.2427, "step": 7605 }, { "epoch": 1.01, "grad_norm": 0.291015625, "learning_rate": 4.545820861558901e-05, "loss": 2.2469, "step": 7606 }, { "epoch": 1.01, "grad_norm": 0.29296875, "learning_rate": 4.545699200481774e-05, "loss": 2.2729, "step": 7607 }, { "epoch": 1.01, "grad_norm": 0.302734375, "learning_rate": 4.5455775247406206e-05, "loss": 2.2485, "step": 7608 }, { "epoch": 1.01, "grad_norm": 0.287109375, "learning_rate": 4.545455834336313e-05, "loss": 2.2688, "step": 7609 }, { "epoch": 1.02, "grad_norm": 0.291015625, "learning_rate": 4.545334129269723e-05, "loss": 2.2369, "step": 7610 }, { "epoch": 1.02, "grad_norm": 0.28125, "learning_rate": 4.545212409541723e-05, "loss": 2.2791, "step": 7611 }, { "epoch": 1.02, "grad_norm": 0.28515625, "learning_rate": 4.5450906751531865e-05, "loss": 2.2766, "step": 7612 }, { "epoch": 1.02, "grad_norm": 0.29296875, "learning_rate": 4.5449689261049845e-05, "loss": 2.2643, "step": 7613 }, { "epoch": 1.02, "grad_norm": 0.2890625, "learning_rate": 4.544847162397992e-05, "loss": 2.2609, "step": 7614 }, { "epoch": 1.02, "grad_norm": 0.287109375, "learning_rate": 4.544725384033079e-05, "loss": 2.2383, "step": 7615 }, { "epoch": 1.02, "grad_norm": 0.3046875, "learning_rate": 4.544603591011121e-05, "loss": 2.2434, "step": 7616 }, { "epoch": 1.02, "grad_norm": 0.30078125, "learning_rate": 4.544481783332989e-05, "loss": 2.2702, "step": 7617 }, { "epoch": 1.02, "grad_norm": 0.26953125, "learning_rate": 4.544359960999558e-05, "loss": 2.2696, "step": 7618 }, { "epoch": 1.02, "grad_norm": 0.30859375, "learning_rate": 4.5442381240117e-05, "loss": 2.2628, "step": 7619 }, { "epoch": 1.02, "grad_norm": 0.30078125, "learning_rate": 4.544116272370289e-05, "loss": 2.2483, "step": 7620 }, { "epoch": 1.02, "grad_norm": 0.287109375, "learning_rate": 4.5439944060761975e-05, "loss": 2.242, "step": 7621 }, { "epoch": 1.02, "grad_norm": 0.29296875, "learning_rate": 4.5438725251303004e-05, "loss": 2.2705, "step": 7622 }, { "epoch": 1.02, "grad_norm": 0.283203125, "learning_rate": 4.54375062953347e-05, "loss": 2.2826, "step": 7623 }, { "epoch": 1.02, "grad_norm": 0.28125, "learning_rate": 4.543628719286581e-05, "loss": 2.2749, "step": 7624 }, { "epoch": 1.02, "grad_norm": 0.298828125, "learning_rate": 4.5435067943905074e-05, "loss": 2.2531, "step": 7625 }, { "epoch": 1.02, "grad_norm": 0.287109375, "learning_rate": 4.5433848548461236e-05, "loss": 2.248, "step": 7626 }, { "epoch": 1.02, "grad_norm": 0.287109375, "learning_rate": 4.543262900654301e-05, "loss": 2.2718, "step": 7627 }, { "epoch": 1.02, "grad_norm": 0.302734375, "learning_rate": 4.543140931815917e-05, "loss": 2.2241, "step": 7628 }, { "epoch": 1.02, "grad_norm": 0.310546875, "learning_rate": 4.5430189483318444e-05, "loss": 2.2692, "step": 7629 }, { "epoch": 1.02, "grad_norm": 0.287109375, "learning_rate": 4.542896950202958e-05, "loss": 2.2721, "step": 7630 }, { "epoch": 1.02, "grad_norm": 0.28125, "learning_rate": 4.542774937430132e-05, "loss": 2.2719, "step": 7631 }, { "epoch": 1.02, "grad_norm": 0.2890625, "learning_rate": 4.542652910014241e-05, "loss": 2.2506, "step": 7632 }, { "epoch": 1.02, "grad_norm": 0.3046875, "learning_rate": 4.542530867956161e-05, "loss": 2.2455, "step": 7633 }, { "epoch": 1.02, "grad_norm": 0.291015625, "learning_rate": 4.542408811256764e-05, "loss": 2.2529, "step": 7634 }, { "epoch": 1.02, "grad_norm": 0.29296875, "learning_rate": 4.5422867399169275e-05, "loss": 2.2406, "step": 7635 }, { "epoch": 1.02, "grad_norm": 0.296875, "learning_rate": 4.542164653937526e-05, "loss": 2.2452, "step": 7636 }, { "epoch": 1.02, "grad_norm": 0.302734375, "learning_rate": 4.542042553319433e-05, "loss": 2.2385, "step": 7637 }, { "epoch": 1.02, "grad_norm": 0.291015625, "learning_rate": 4.541920438063526e-05, "loss": 2.2749, "step": 7638 }, { "epoch": 1.02, "grad_norm": 0.298828125, "learning_rate": 4.5417983081706795e-05, "loss": 2.237, "step": 7639 }, { "epoch": 1.02, "grad_norm": 0.30078125, "learning_rate": 4.5416761636417685e-05, "loss": 2.2468, "step": 7640 }, { "epoch": 1.02, "grad_norm": 0.294921875, "learning_rate": 4.541554004477669e-05, "loss": 2.264, "step": 7641 }, { "epoch": 1.02, "grad_norm": 0.30078125, "learning_rate": 4.541431830679257e-05, "loss": 2.25, "step": 7642 }, { "epoch": 1.02, "grad_norm": 0.294921875, "learning_rate": 4.541309642247408e-05, "loss": 2.2702, "step": 7643 }, { "epoch": 1.02, "grad_norm": 0.296875, "learning_rate": 4.541187439182997e-05, "loss": 2.2752, "step": 7644 }, { "epoch": 1.02, "grad_norm": 0.2890625, "learning_rate": 4.541065221486901e-05, "loss": 2.2435, "step": 7645 }, { "epoch": 1.02, "grad_norm": 0.28125, "learning_rate": 4.540942989159995e-05, "loss": 2.2724, "step": 7646 }, { "epoch": 1.02, "grad_norm": 0.30078125, "learning_rate": 4.5408207422031566e-05, "loss": 2.2773, "step": 7647 }, { "epoch": 1.02, "grad_norm": 0.294921875, "learning_rate": 4.540698480617262e-05, "loss": 2.2676, "step": 7648 }, { "epoch": 1.02, "grad_norm": 0.287109375, "learning_rate": 4.5405762044031864e-05, "loss": 2.279, "step": 7649 }, { "epoch": 1.02, "grad_norm": 0.3046875, "learning_rate": 4.540453913561807e-05, "loss": 2.2605, "step": 7650 }, { "epoch": 1.02, "grad_norm": 0.27734375, "learning_rate": 4.5403316080940005e-05, "loss": 2.2567, "step": 7651 }, { "epoch": 1.02, "grad_norm": 0.294921875, "learning_rate": 4.540209288000643e-05, "loss": 2.2463, "step": 7652 }, { "epoch": 1.02, "grad_norm": 0.296875, "learning_rate": 4.5400869532826126e-05, "loss": 2.2929, "step": 7653 }, { "epoch": 1.02, "grad_norm": 0.3046875, "learning_rate": 4.539964603940785e-05, "loss": 2.2397, "step": 7654 }, { "epoch": 1.02, "grad_norm": 0.3046875, "learning_rate": 4.539842239976037e-05, "loss": 2.2461, "step": 7655 }, { "epoch": 1.02, "grad_norm": 0.279296875, "learning_rate": 4.5397198613892476e-05, "loss": 2.254, "step": 7656 }, { "epoch": 1.02, "grad_norm": 0.294921875, "learning_rate": 4.539597468181292e-05, "loss": 2.2392, "step": 7657 }, { "epoch": 1.02, "grad_norm": 0.296875, "learning_rate": 4.5394750603530495e-05, "loss": 2.2451, "step": 7658 }, { "epoch": 1.02, "grad_norm": 0.314453125, "learning_rate": 4.5393526379053955e-05, "loss": 2.2386, "step": 7659 }, { "epoch": 1.02, "grad_norm": 0.28125, "learning_rate": 4.5392302008392085e-05, "loss": 2.2735, "step": 7660 }, { "epoch": 1.02, "grad_norm": 0.302734375, "learning_rate": 4.539107749155366e-05, "loss": 2.2972, "step": 7661 }, { "epoch": 1.02, "grad_norm": 0.28515625, "learning_rate": 4.538985282854746e-05, "loss": 2.2579, "step": 7662 }, { "epoch": 1.02, "grad_norm": 0.279296875, "learning_rate": 4.5388628019382266e-05, "loss": 2.2433, "step": 7663 }, { "epoch": 1.02, "grad_norm": 0.3046875, "learning_rate": 4.5387403064066855e-05, "loss": 2.2633, "step": 7664 }, { "epoch": 1.02, "grad_norm": 0.283203125, "learning_rate": 4.5386177962609996e-05, "loss": 2.2472, "step": 7665 }, { "epoch": 1.02, "grad_norm": 0.287109375, "learning_rate": 4.538495271502049e-05, "loss": 2.2524, "step": 7666 }, { "epoch": 1.02, "grad_norm": 0.3046875, "learning_rate": 4.538372732130711e-05, "loss": 2.2282, "step": 7667 }, { "epoch": 1.02, "grad_norm": 0.2890625, "learning_rate": 4.5382501781478646e-05, "loss": 2.2661, "step": 7668 }, { "epoch": 1.02, "grad_norm": 0.294921875, "learning_rate": 4.538127609554387e-05, "loss": 2.2467, "step": 7669 }, { "epoch": 1.02, "grad_norm": 0.296875, "learning_rate": 4.538005026351158e-05, "loss": 2.2839, "step": 7670 }, { "epoch": 1.02, "grad_norm": 0.3125, "learning_rate": 4.5378824285390566e-05, "loss": 2.261, "step": 7671 }, { "epoch": 1.02, "grad_norm": 0.28515625, "learning_rate": 4.53775981611896e-05, "loss": 2.2481, "step": 7672 }, { "epoch": 1.02, "grad_norm": 0.291015625, "learning_rate": 4.537637189091748e-05, "loss": 2.2451, "step": 7673 }, { "epoch": 1.02, "grad_norm": 0.283203125, "learning_rate": 4.5375145474583e-05, "loss": 2.309, "step": 7674 }, { "epoch": 1.02, "grad_norm": 0.29296875, "learning_rate": 4.537391891219496e-05, "loss": 2.2858, "step": 7675 }, { "epoch": 1.02, "grad_norm": 0.294921875, "learning_rate": 4.537269220376212e-05, "loss": 2.277, "step": 7676 }, { "epoch": 1.02, "grad_norm": 0.30078125, "learning_rate": 4.537146534929331e-05, "loss": 2.2795, "step": 7677 }, { "epoch": 1.02, "grad_norm": 0.26953125, "learning_rate": 4.53702383487973e-05, "loss": 2.2812, "step": 7678 }, { "epoch": 1.02, "grad_norm": 0.287109375, "learning_rate": 4.5369011202282895e-05, "loss": 2.2445, "step": 7679 }, { "epoch": 1.02, "grad_norm": 0.291015625, "learning_rate": 4.5367783909758884e-05, "loss": 2.274, "step": 7680 }, { "epoch": 1.02, "grad_norm": 0.294921875, "learning_rate": 4.536655647123408e-05, "loss": 2.2672, "step": 7681 }, { "epoch": 1.02, "grad_norm": 0.294921875, "learning_rate": 4.536532888671726e-05, "loss": 2.2774, "step": 7682 }, { "epoch": 1.02, "grad_norm": 0.294921875, "learning_rate": 4.536410115621725e-05, "loss": 2.2496, "step": 7683 }, { "epoch": 1.02, "grad_norm": 0.2890625, "learning_rate": 4.536287327974282e-05, "loss": 2.2502, "step": 7684 }, { "epoch": 1.03, "grad_norm": 0.283203125, "learning_rate": 4.5361645257302805e-05, "loss": 2.2789, "step": 7685 }, { "epoch": 1.03, "grad_norm": 0.28515625, "learning_rate": 4.536041708890598e-05, "loss": 2.2594, "step": 7686 }, { "epoch": 1.03, "grad_norm": 0.2890625, "learning_rate": 4.535918877456116e-05, "loss": 2.2459, "step": 7687 }, { "epoch": 1.03, "grad_norm": 0.2890625, "learning_rate": 4.5357960314277154e-05, "loss": 2.2293, "step": 7688 }, { "epoch": 1.03, "grad_norm": 0.306640625, "learning_rate": 4.535673170806276e-05, "loss": 2.262, "step": 7689 }, { "epoch": 1.03, "grad_norm": 0.287109375, "learning_rate": 4.5355502955926787e-05, "loss": 2.2736, "step": 7690 }, { "epoch": 1.03, "grad_norm": 0.296875, "learning_rate": 4.535427405787804e-05, "loss": 2.2786, "step": 7691 }, { "epoch": 1.03, "grad_norm": 0.279296875, "learning_rate": 4.5353045013925344e-05, "loss": 2.2186, "step": 7692 }, { "epoch": 1.03, "grad_norm": 0.291015625, "learning_rate": 4.535181582407749e-05, "loss": 2.2675, "step": 7693 }, { "epoch": 1.03, "grad_norm": 0.271484375, "learning_rate": 4.5350586488343294e-05, "loss": 2.2498, "step": 7694 }, { "epoch": 1.03, "grad_norm": 0.283203125, "learning_rate": 4.534935700673157e-05, "loss": 2.2783, "step": 7695 }, { "epoch": 1.03, "grad_norm": 0.2890625, "learning_rate": 4.5348127379251136e-05, "loss": 2.2877, "step": 7696 }, { "epoch": 1.03, "grad_norm": 0.298828125, "learning_rate": 4.53468976059108e-05, "loss": 2.2689, "step": 7697 }, { "epoch": 1.03, "grad_norm": 0.302734375, "learning_rate": 4.534566768671937e-05, "loss": 2.2908, "step": 7698 }, { "epoch": 1.03, "grad_norm": 0.287109375, "learning_rate": 4.534443762168569e-05, "loss": 2.284, "step": 7699 }, { "epoch": 1.03, "grad_norm": 0.29296875, "learning_rate": 4.5343207410818544e-05, "loss": 2.2589, "step": 7700 }, { "epoch": 1.03, "grad_norm": 0.28125, "learning_rate": 4.534197705412677e-05, "loss": 2.2718, "step": 7701 }, { "epoch": 1.03, "grad_norm": 0.296875, "learning_rate": 4.5340746551619186e-05, "loss": 2.261, "step": 7702 }, { "epoch": 1.03, "grad_norm": 0.302734375, "learning_rate": 4.53395159033046e-05, "loss": 2.2887, "step": 7703 }, { "epoch": 1.03, "grad_norm": 0.279296875, "learning_rate": 4.533828510919185e-05, "loss": 2.2338, "step": 7704 }, { "epoch": 1.03, "grad_norm": 0.3125, "learning_rate": 4.533705416928975e-05, "loss": 2.2423, "step": 7705 }, { "epoch": 1.03, "grad_norm": 0.296875, "learning_rate": 4.533582308360712e-05, "loss": 2.2571, "step": 7706 }, { "epoch": 1.03, "grad_norm": 0.28125, "learning_rate": 4.5334591852152794e-05, "loss": 2.2188, "step": 7707 }, { "epoch": 1.03, "grad_norm": 0.279296875, "learning_rate": 4.5333360474935594e-05, "loss": 2.2935, "step": 7708 }, { "epoch": 1.03, "grad_norm": 0.30078125, "learning_rate": 4.533212895196435e-05, "loss": 2.2521, "step": 7709 }, { "epoch": 1.03, "grad_norm": 0.28515625, "learning_rate": 4.533089728324788e-05, "loss": 2.2435, "step": 7710 }, { "epoch": 1.03, "grad_norm": 0.2890625, "learning_rate": 4.5329665468795025e-05, "loss": 2.255, "step": 7711 }, { "epoch": 1.03, "grad_norm": 0.275390625, "learning_rate": 4.5328433508614596e-05, "loss": 2.2326, "step": 7712 }, { "epoch": 1.03, "grad_norm": 0.298828125, "learning_rate": 4.532720140271545e-05, "loss": 2.2756, "step": 7713 }, { "epoch": 1.03, "grad_norm": 0.310546875, "learning_rate": 4.5325969151106396e-05, "loss": 2.2428, "step": 7714 }, { "epoch": 1.03, "grad_norm": 0.2890625, "learning_rate": 4.5324736753796285e-05, "loss": 2.2715, "step": 7715 }, { "epoch": 1.03, "grad_norm": 0.298828125, "learning_rate": 4.5323504210793946e-05, "loss": 2.2721, "step": 7716 }, { "epoch": 1.03, "grad_norm": 0.29296875, "learning_rate": 4.5322271522108194e-05, "loss": 2.2693, "step": 7717 }, { "epoch": 1.03, "grad_norm": 0.2890625, "learning_rate": 4.53210386877479e-05, "loss": 2.2712, "step": 7718 }, { "epoch": 1.03, "grad_norm": 0.28515625, "learning_rate": 4.531980570772187e-05, "loss": 2.2581, "step": 7719 }, { "epoch": 1.03, "grad_norm": 0.287109375, "learning_rate": 4.531857258203897e-05, "loss": 2.2502, "step": 7720 }, { "epoch": 1.03, "grad_norm": 0.287109375, "learning_rate": 4.531733931070801e-05, "loss": 2.2459, "step": 7721 }, { "epoch": 1.03, "grad_norm": 0.2890625, "learning_rate": 4.5316105893737854e-05, "loss": 2.271, "step": 7722 }, { "epoch": 1.03, "grad_norm": 0.302734375, "learning_rate": 4.531487233113734e-05, "loss": 2.2574, "step": 7723 }, { "epoch": 1.03, "grad_norm": 0.294921875, "learning_rate": 4.53136386229153e-05, "loss": 2.2323, "step": 7724 }, { "epoch": 1.03, "grad_norm": 0.29296875, "learning_rate": 4.5312404769080574e-05, "loss": 2.2885, "step": 7725 }, { "epoch": 1.03, "grad_norm": 0.30078125, "learning_rate": 4.5311170769642026e-05, "loss": 2.2769, "step": 7726 }, { "epoch": 1.03, "grad_norm": 0.294921875, "learning_rate": 4.530993662460848e-05, "loss": 2.2574, "step": 7727 }, { "epoch": 1.03, "grad_norm": 0.27734375, "learning_rate": 4.5308702333988805e-05, "loss": 2.2632, "step": 7728 }, { "epoch": 1.03, "grad_norm": 0.291015625, "learning_rate": 4.530746789779183e-05, "loss": 2.2446, "step": 7729 }, { "epoch": 1.03, "grad_norm": 0.28515625, "learning_rate": 4.530623331602642e-05, "loss": 2.2555, "step": 7730 }, { "epoch": 1.03, "grad_norm": 0.310546875, "learning_rate": 4.5304998588701405e-05, "loss": 2.2557, "step": 7731 }, { "epoch": 1.03, "grad_norm": 0.28515625, "learning_rate": 4.530376371582566e-05, "loss": 2.2435, "step": 7732 }, { "epoch": 1.03, "grad_norm": 0.291015625, "learning_rate": 4.530252869740801e-05, "loss": 2.2537, "step": 7733 }, { "epoch": 1.03, "grad_norm": 0.29296875, "learning_rate": 4.5301293533457325e-05, "loss": 2.2355, "step": 7734 }, { "epoch": 1.03, "grad_norm": 0.287109375, "learning_rate": 4.530005822398246e-05, "loss": 2.2627, "step": 7735 }, { "epoch": 1.03, "grad_norm": 0.310546875, "learning_rate": 4.529882276899226e-05, "loss": 2.2691, "step": 7736 }, { "epoch": 1.03, "grad_norm": 0.30078125, "learning_rate": 4.529758716849559e-05, "loss": 2.2502, "step": 7737 }, { "epoch": 1.03, "grad_norm": 0.3125, "learning_rate": 4.52963514225013e-05, "loss": 2.2738, "step": 7738 }, { "epoch": 1.03, "grad_norm": 0.314453125, "learning_rate": 4.529511553101825e-05, "loss": 2.2515, "step": 7739 }, { "epoch": 1.03, "grad_norm": 0.291015625, "learning_rate": 4.52938794940553e-05, "loss": 2.2253, "step": 7740 }, { "epoch": 1.03, "grad_norm": 0.28125, "learning_rate": 4.529264331162132e-05, "loss": 2.2447, "step": 7741 }, { "epoch": 1.03, "grad_norm": 0.296875, "learning_rate": 4.5291406983725154e-05, "loss": 2.2571, "step": 7742 }, { "epoch": 1.03, "grad_norm": 0.29296875, "learning_rate": 4.529017051037567e-05, "loss": 2.2843, "step": 7743 }, { "epoch": 1.03, "grad_norm": 0.291015625, "learning_rate": 4.528893389158174e-05, "loss": 2.2723, "step": 7744 }, { "epoch": 1.03, "grad_norm": 0.2890625, "learning_rate": 4.528769712735222e-05, "loss": 2.2641, "step": 7745 }, { "epoch": 1.03, "grad_norm": 0.298828125, "learning_rate": 4.528646021769597e-05, "loss": 2.2561, "step": 7746 }, { "epoch": 1.03, "grad_norm": 0.298828125, "learning_rate": 4.528522316262187e-05, "loss": 2.2461, "step": 7747 }, { "epoch": 1.03, "grad_norm": 0.298828125, "learning_rate": 4.528398596213878e-05, "loss": 2.2601, "step": 7748 }, { "epoch": 1.03, "grad_norm": 0.279296875, "learning_rate": 4.528274861625557e-05, "loss": 2.2663, "step": 7749 }, { "epoch": 1.03, "grad_norm": 0.291015625, "learning_rate": 4.5281511124981114e-05, "loss": 2.3077, "step": 7750 }, { "epoch": 1.03, "grad_norm": 0.294921875, "learning_rate": 4.528027348832427e-05, "loss": 2.2804, "step": 7751 }, { "epoch": 1.03, "grad_norm": 0.294921875, "learning_rate": 4.5279035706293924e-05, "loss": 2.2794, "step": 7752 }, { "epoch": 1.03, "grad_norm": 0.302734375, "learning_rate": 4.527779777889893e-05, "loss": 2.2585, "step": 7753 }, { "epoch": 1.03, "grad_norm": 0.296875, "learning_rate": 4.5276559706148194e-05, "loss": 2.256, "step": 7754 }, { "epoch": 1.03, "grad_norm": 0.298828125, "learning_rate": 4.527532148805056e-05, "loss": 2.2751, "step": 7755 }, { "epoch": 1.03, "grad_norm": 0.2734375, "learning_rate": 4.5274083124614905e-05, "loss": 2.2663, "step": 7756 }, { "epoch": 1.03, "grad_norm": 0.28125, "learning_rate": 4.527284461585013e-05, "loss": 2.2418, "step": 7757 }, { "epoch": 1.03, "grad_norm": 0.298828125, "learning_rate": 4.5271605961765094e-05, "loss": 2.2392, "step": 7758 }, { "epoch": 1.03, "grad_norm": 0.28125, "learning_rate": 4.5270367162368677e-05, "loss": 2.2572, "step": 7759 }, { "epoch": 1.04, "grad_norm": 0.30859375, "learning_rate": 4.526912821766977e-05, "loss": 2.2431, "step": 7760 }, { "epoch": 1.04, "grad_norm": 0.29296875, "learning_rate": 4.526788912767723e-05, "loss": 2.2204, "step": 7761 }, { "epoch": 1.04, "grad_norm": 0.28515625, "learning_rate": 4.526664989239997e-05, "loss": 2.2481, "step": 7762 }, { "epoch": 1.04, "grad_norm": 0.287109375, "learning_rate": 4.526541051184685e-05, "loss": 2.2852, "step": 7763 }, { "epoch": 1.04, "grad_norm": 0.29296875, "learning_rate": 4.526417098602677e-05, "loss": 2.2708, "step": 7764 }, { "epoch": 1.04, "grad_norm": 0.28515625, "learning_rate": 4.526293131494861e-05, "loss": 2.2253, "step": 7765 }, { "epoch": 1.04, "grad_norm": 0.28515625, "learning_rate": 4.526169149862125e-05, "loss": 2.2543, "step": 7766 }, { "epoch": 1.04, "grad_norm": 0.30078125, "learning_rate": 4.5260451537053574e-05, "loss": 2.2482, "step": 7767 }, { "epoch": 1.04, "grad_norm": 0.283203125, "learning_rate": 4.525921143025449e-05, "loss": 2.2709, "step": 7768 }, { "epoch": 1.04, "grad_norm": 0.26953125, "learning_rate": 4.525797117823286e-05, "loss": 2.2502, "step": 7769 }, { "epoch": 1.04, "grad_norm": 0.302734375, "learning_rate": 4.52567307809976e-05, "loss": 2.2582, "step": 7770 }, { "epoch": 1.04, "grad_norm": 0.2890625, "learning_rate": 4.5255490238557593e-05, "loss": 2.2702, "step": 7771 }, { "epoch": 1.04, "grad_norm": 0.302734375, "learning_rate": 4.525424955092173e-05, "loss": 2.2627, "step": 7772 }, { "epoch": 1.04, "grad_norm": 0.30078125, "learning_rate": 4.52530087180989e-05, "loss": 2.2982, "step": 7773 }, { "epoch": 1.04, "grad_norm": 0.28515625, "learning_rate": 4.5251767740097995e-05, "loss": 2.2505, "step": 7774 }, { "epoch": 1.04, "grad_norm": 0.3046875, "learning_rate": 4.5250526616927924e-05, "loss": 2.2444, "step": 7775 }, { "epoch": 1.04, "grad_norm": 0.3125, "learning_rate": 4.524928534859757e-05, "loss": 2.2519, "step": 7776 }, { "epoch": 1.04, "grad_norm": 0.30078125, "learning_rate": 4.5248043935115845e-05, "loss": 2.2638, "step": 7777 }, { "epoch": 1.04, "grad_norm": 0.30078125, "learning_rate": 4.524680237649164e-05, "loss": 2.2786, "step": 7778 }, { "epoch": 1.04, "grad_norm": 0.302734375, "learning_rate": 4.524556067273385e-05, "loss": 2.2489, "step": 7779 }, { "epoch": 1.04, "grad_norm": 0.296875, "learning_rate": 4.524431882385138e-05, "loss": 2.2456, "step": 7780 }, { "epoch": 1.04, "grad_norm": 0.287109375, "learning_rate": 4.5243076829853135e-05, "loss": 2.2624, "step": 7781 }, { "epoch": 1.04, "grad_norm": 0.298828125, "learning_rate": 4.524183469074802e-05, "loss": 2.3042, "step": 7782 }, { "epoch": 1.04, "grad_norm": 0.287109375, "learning_rate": 4.5240592406544935e-05, "loss": 2.237, "step": 7783 }, { "epoch": 1.04, "grad_norm": 0.275390625, "learning_rate": 4.523934997725278e-05, "loss": 2.2679, "step": 7784 }, { "epoch": 1.04, "grad_norm": 0.27734375, "learning_rate": 4.5238107402880456e-05, "loss": 2.2753, "step": 7785 }, { "epoch": 1.04, "grad_norm": 0.28125, "learning_rate": 4.52368646834369e-05, "loss": 2.2677, "step": 7786 }, { "epoch": 1.04, "grad_norm": 0.287109375, "learning_rate": 4.5235621818930984e-05, "loss": 2.281, "step": 7787 }, { "epoch": 1.04, "grad_norm": 0.28125, "learning_rate": 4.5234378809371635e-05, "loss": 2.2481, "step": 7788 }, { "epoch": 1.04, "grad_norm": 0.28515625, "learning_rate": 4.5233135654767766e-05, "loss": 2.2334, "step": 7789 }, { "epoch": 1.04, "grad_norm": 0.2890625, "learning_rate": 4.5231892355128275e-05, "loss": 2.2432, "step": 7790 }, { "epoch": 1.04, "grad_norm": 0.294921875, "learning_rate": 4.5230648910462086e-05, "loss": 2.2542, "step": 7791 }, { "epoch": 1.04, "grad_norm": 0.28515625, "learning_rate": 4.522940532077811e-05, "loss": 2.2262, "step": 7792 }, { "epoch": 1.04, "grad_norm": 0.431640625, "learning_rate": 4.522816158608526e-05, "loss": 2.2812, "step": 7793 }, { "epoch": 1.04, "grad_norm": 0.310546875, "learning_rate": 4.522691770639245e-05, "loss": 2.2389, "step": 7794 }, { "epoch": 1.04, "grad_norm": 0.29296875, "learning_rate": 4.52256736817086e-05, "loss": 2.2933, "step": 7795 }, { "epoch": 1.04, "grad_norm": 0.294921875, "learning_rate": 4.5224429512042624e-05, "loss": 2.2763, "step": 7796 }, { "epoch": 1.04, "grad_norm": 0.283203125, "learning_rate": 4.522318519740344e-05, "loss": 2.2414, "step": 7797 }, { "epoch": 1.04, "grad_norm": 0.302734375, "learning_rate": 4.522194073779997e-05, "loss": 2.2163, "step": 7798 }, { "epoch": 1.04, "grad_norm": 0.294921875, "learning_rate": 4.522069613324113e-05, "loss": 2.274, "step": 7799 }, { "epoch": 1.04, "grad_norm": 0.28125, "learning_rate": 4.521945138373585e-05, "loss": 2.2471, "step": 7800 }, { "epoch": 1.04, "grad_norm": 0.283203125, "learning_rate": 4.5218206489293044e-05, "loss": 2.2543, "step": 7801 }, { "epoch": 1.04, "grad_norm": 0.29296875, "learning_rate": 4.5216961449921646e-05, "loss": 2.245, "step": 7802 }, { "epoch": 1.04, "grad_norm": 0.29296875, "learning_rate": 4.521571626563057e-05, "loss": 2.3157, "step": 7803 }, { "epoch": 1.04, "grad_norm": 0.302734375, "learning_rate": 4.521447093642874e-05, "loss": 2.259, "step": 7804 }, { "epoch": 1.04, "grad_norm": 0.330078125, "learning_rate": 4.5213225462325095e-05, "loss": 2.2217, "step": 7805 }, { "epoch": 1.04, "grad_norm": 0.29296875, "learning_rate": 4.5211979843328556e-05, "loss": 2.2521, "step": 7806 }, { "epoch": 1.04, "grad_norm": 0.28125, "learning_rate": 4.5210734079448055e-05, "loss": 2.27, "step": 7807 }, { "epoch": 1.04, "grad_norm": 0.29296875, "learning_rate": 4.5209488170692516e-05, "loss": 2.2402, "step": 7808 }, { "epoch": 1.04, "grad_norm": 0.2890625, "learning_rate": 4.520824211707088e-05, "loss": 2.2805, "step": 7809 }, { "epoch": 1.04, "grad_norm": 0.30078125, "learning_rate": 4.520699591859207e-05, "loss": 2.2319, "step": 7810 }, { "epoch": 1.04, "grad_norm": 0.28515625, "learning_rate": 4.520574957526501e-05, "loss": 2.2594, "step": 7811 }, { "epoch": 1.04, "grad_norm": 0.298828125, "learning_rate": 4.520450308709866e-05, "loss": 2.2784, "step": 7812 }, { "epoch": 1.04, "grad_norm": 0.298828125, "learning_rate": 4.520325645410193e-05, "loss": 2.2338, "step": 7813 }, { "epoch": 1.04, "grad_norm": 0.287109375, "learning_rate": 4.520200967628377e-05, "loss": 2.2442, "step": 7814 }, { "epoch": 1.04, "grad_norm": 0.28515625, "learning_rate": 4.520076275365312e-05, "loss": 2.2575, "step": 7815 }, { "epoch": 1.04, "grad_norm": 0.298828125, "learning_rate": 4.5199515686218905e-05, "loss": 2.2538, "step": 7816 }, { "epoch": 1.04, "grad_norm": 0.291015625, "learning_rate": 4.519826847399008e-05, "loss": 2.2619, "step": 7817 }, { "epoch": 1.04, "grad_norm": 0.29296875, "learning_rate": 4.519702111697557e-05, "loss": 2.2671, "step": 7818 }, { "epoch": 1.04, "grad_norm": 0.294921875, "learning_rate": 4.519577361518432e-05, "loss": 2.2788, "step": 7819 }, { "epoch": 1.04, "grad_norm": 0.287109375, "learning_rate": 4.5194525968625276e-05, "loss": 2.2719, "step": 7820 }, { "epoch": 1.04, "grad_norm": 0.298828125, "learning_rate": 4.519327817730739e-05, "loss": 2.2572, "step": 7821 }, { "epoch": 1.04, "grad_norm": 0.296875, "learning_rate": 4.5192030241239583e-05, "loss": 2.2625, "step": 7822 }, { "epoch": 1.04, "grad_norm": 0.287109375, "learning_rate": 4.5190782160430824e-05, "loss": 2.2635, "step": 7823 }, { "epoch": 1.04, "grad_norm": 0.365234375, "learning_rate": 4.518953393489005e-05, "loss": 2.274, "step": 7824 }, { "epoch": 1.04, "grad_norm": 0.283203125, "learning_rate": 4.518828556462621e-05, "loss": 2.2596, "step": 7825 }, { "epoch": 1.04, "grad_norm": 0.298828125, "learning_rate": 4.5187037049648254e-05, "loss": 2.2347, "step": 7826 }, { "epoch": 1.04, "grad_norm": 0.29296875, "learning_rate": 4.518578838996512e-05, "loss": 2.2586, "step": 7827 }, { "epoch": 1.04, "grad_norm": 0.287109375, "learning_rate": 4.518453958558577e-05, "loss": 2.2545, "step": 7828 }, { "epoch": 1.04, "grad_norm": 0.294921875, "learning_rate": 4.5183290636519155e-05, "loss": 2.2887, "step": 7829 }, { "epoch": 1.04, "grad_norm": 0.2890625, "learning_rate": 4.518204154277422e-05, "loss": 2.2613, "step": 7830 }, { "epoch": 1.04, "grad_norm": 0.296875, "learning_rate": 4.5180792304359935e-05, "loss": 2.2254, "step": 7831 }, { "epoch": 1.04, "grad_norm": 0.3125, "learning_rate": 4.517954292128524e-05, "loss": 2.2287, "step": 7832 }, { "epoch": 1.04, "grad_norm": 0.2890625, "learning_rate": 4.51782933935591e-05, "loss": 2.2244, "step": 7833 }, { "epoch": 1.04, "grad_norm": 0.30078125, "learning_rate": 4.517704372119046e-05, "loss": 2.2589, "step": 7834 }, { "epoch": 1.05, "grad_norm": 0.287109375, "learning_rate": 4.517579390418829e-05, "loss": 2.2676, "step": 7835 }, { "epoch": 1.05, "grad_norm": 0.283203125, "learning_rate": 4.5174543942561543e-05, "loss": 2.2617, "step": 7836 }, { "epoch": 1.05, "grad_norm": 0.298828125, "learning_rate": 4.5173293836319184e-05, "loss": 2.254, "step": 7837 }, { "epoch": 1.05, "grad_norm": 0.27734375, "learning_rate": 4.517204358547017e-05, "loss": 2.2816, "step": 7838 }, { "epoch": 1.05, "grad_norm": 0.310546875, "learning_rate": 4.517079319002346e-05, "loss": 2.244, "step": 7839 }, { "epoch": 1.05, "grad_norm": 0.296875, "learning_rate": 4.5169542649988015e-05, "loss": 2.2237, "step": 7840 }, { "epoch": 1.05, "grad_norm": 0.294921875, "learning_rate": 4.516829196537282e-05, "loss": 2.2926, "step": 7841 }, { "epoch": 1.05, "grad_norm": 0.2734375, "learning_rate": 4.516704113618681e-05, "loss": 2.2613, "step": 7842 }, { "epoch": 1.05, "grad_norm": 0.294921875, "learning_rate": 4.5165790162438984e-05, "loss": 2.2251, "step": 7843 }, { "epoch": 1.05, "grad_norm": 0.306640625, "learning_rate": 4.5164539044138275e-05, "loss": 2.24, "step": 7844 }, { "epoch": 1.05, "grad_norm": 0.291015625, "learning_rate": 4.516328778129367e-05, "loss": 2.2657, "step": 7845 }, { "epoch": 1.05, "grad_norm": 0.283203125, "learning_rate": 4.516203637391414e-05, "loss": 2.277, "step": 7846 }, { "epoch": 1.05, "grad_norm": 0.302734375, "learning_rate": 4.516078482200865e-05, "loss": 2.2753, "step": 7847 }, { "epoch": 1.05, "grad_norm": 0.28125, "learning_rate": 4.515953312558617e-05, "loss": 2.2274, "step": 7848 }, { "epoch": 1.05, "grad_norm": 0.2890625, "learning_rate": 4.515828128465568e-05, "loss": 2.2581, "step": 7849 }, { "epoch": 1.05, "grad_norm": 0.271484375, "learning_rate": 4.515702929922615e-05, "loss": 2.2957, "step": 7850 }, { "epoch": 1.05, "grad_norm": 0.2890625, "learning_rate": 4.515577716930655e-05, "loss": 2.2408, "step": 7851 }, { "epoch": 1.05, "grad_norm": 0.3125, "learning_rate": 4.5154524894905855e-05, "loss": 2.2542, "step": 7852 }, { "epoch": 1.05, "grad_norm": 0.287109375, "learning_rate": 4.515327247603305e-05, "loss": 2.2662, "step": 7853 }, { "epoch": 1.05, "grad_norm": 0.322265625, "learning_rate": 4.5152019912697106e-05, "loss": 2.2474, "step": 7854 }, { "epoch": 1.05, "grad_norm": 0.287109375, "learning_rate": 4.5150767204907005e-05, "loss": 2.2421, "step": 7855 }, { "epoch": 1.05, "grad_norm": 0.3046875, "learning_rate": 4.5149514352671726e-05, "loss": 2.2753, "step": 7856 }, { "epoch": 1.05, "grad_norm": 0.298828125, "learning_rate": 4.514826135600025e-05, "loss": 2.2634, "step": 7857 }, { "epoch": 1.05, "grad_norm": 0.29296875, "learning_rate": 4.514700821490155e-05, "loss": 2.2885, "step": 7858 }, { "epoch": 1.05, "grad_norm": 0.283203125, "learning_rate": 4.514575492938463e-05, "loss": 2.2372, "step": 7859 }, { "epoch": 1.05, "grad_norm": 0.2890625, "learning_rate": 4.514450149945845e-05, "loss": 2.2876, "step": 7860 }, { "epoch": 1.05, "grad_norm": 0.291015625, "learning_rate": 4.5143247925132014e-05, "loss": 2.2884, "step": 7861 }, { "epoch": 1.05, "grad_norm": 0.30078125, "learning_rate": 4.514199420641429e-05, "loss": 2.2795, "step": 7862 }, { "epoch": 1.05, "grad_norm": 0.279296875, "learning_rate": 4.514074034331428e-05, "loss": 2.2803, "step": 7863 }, { "epoch": 1.05, "grad_norm": 0.29296875, "learning_rate": 4.5139486335840966e-05, "loss": 2.2612, "step": 7864 }, { "epoch": 1.05, "grad_norm": 0.306640625, "learning_rate": 4.5138232184003334e-05, "loss": 2.2503, "step": 7865 }, { "epoch": 1.05, "grad_norm": 0.294921875, "learning_rate": 4.513697788781037e-05, "loss": 2.2635, "step": 7866 }, { "epoch": 1.05, "grad_norm": 0.30078125, "learning_rate": 4.5135723447271084e-05, "loss": 2.2802, "step": 7867 }, { "epoch": 1.05, "grad_norm": 0.28515625, "learning_rate": 4.513446886239445e-05, "loss": 2.276, "step": 7868 }, { "epoch": 1.05, "grad_norm": 0.28515625, "learning_rate": 4.5133214133189475e-05, "loss": 2.266, "step": 7869 }, { "epoch": 1.05, "grad_norm": 0.283203125, "learning_rate": 4.513195925966514e-05, "loss": 2.2876, "step": 7870 }, { "epoch": 1.05, "grad_norm": 0.283203125, "learning_rate": 4.513070424183044e-05, "loss": 2.246, "step": 7871 }, { "epoch": 1.05, "grad_norm": 0.30078125, "learning_rate": 4.512944907969439e-05, "loss": 2.266, "step": 7872 }, { "epoch": 1.05, "grad_norm": 0.29296875, "learning_rate": 4.512819377326597e-05, "loss": 2.2648, "step": 7873 }, { "epoch": 1.05, "grad_norm": 0.294921875, "learning_rate": 4.512693832255418e-05, "loss": 2.2628, "step": 7874 }, { "epoch": 1.05, "grad_norm": 0.3125, "learning_rate": 4.512568272756803e-05, "loss": 2.2593, "step": 7875 }, { "epoch": 1.05, "grad_norm": 0.28515625, "learning_rate": 4.51244269883165e-05, "loss": 2.2336, "step": 7876 }, { "epoch": 1.05, "grad_norm": 0.294921875, "learning_rate": 4.512317110480861e-05, "loss": 2.2677, "step": 7877 }, { "epoch": 1.05, "grad_norm": 0.29296875, "learning_rate": 4.5121915077053354e-05, "loss": 2.2553, "step": 7878 }, { "epoch": 1.05, "grad_norm": 0.291015625, "learning_rate": 4.512065890505974e-05, "loss": 2.2615, "step": 7879 }, { "epoch": 1.05, "grad_norm": 0.29296875, "learning_rate": 4.5119402588836765e-05, "loss": 2.2908, "step": 7880 }, { "epoch": 1.05, "grad_norm": 0.31640625, "learning_rate": 4.511814612839345e-05, "loss": 2.2671, "step": 7881 }, { "epoch": 1.05, "grad_norm": 0.279296875, "learning_rate": 4.511688952373878e-05, "loss": 2.2649, "step": 7882 }, { "epoch": 1.05, "grad_norm": 0.28125, "learning_rate": 4.5115632774881786e-05, "loss": 2.2509, "step": 7883 }, { "epoch": 1.05, "grad_norm": 0.302734375, "learning_rate": 4.5114375881831454e-05, "loss": 2.2298, "step": 7884 }, { "epoch": 1.05, "grad_norm": 0.283203125, "learning_rate": 4.5113118844596815e-05, "loss": 2.2894, "step": 7885 }, { "epoch": 1.05, "grad_norm": 0.2890625, "learning_rate": 4.511186166318686e-05, "loss": 2.2573, "step": 7886 }, { "epoch": 1.05, "grad_norm": 0.29296875, "learning_rate": 4.5110604337610615e-05, "loss": 2.2743, "step": 7887 }, { "epoch": 1.05, "grad_norm": 0.291015625, "learning_rate": 4.510934686787709e-05, "loss": 2.2693, "step": 7888 }, { "epoch": 1.05, "grad_norm": 0.27734375, "learning_rate": 4.510808925399529e-05, "loss": 2.2635, "step": 7889 }, { "epoch": 1.05, "grad_norm": 0.28125, "learning_rate": 4.510683149597423e-05, "loss": 2.2533, "step": 7890 }, { "epoch": 1.05, "grad_norm": 0.283203125, "learning_rate": 4.510557359382295e-05, "loss": 2.2585, "step": 7891 }, { "epoch": 1.05, "grad_norm": 0.2890625, "learning_rate": 4.510431554755044e-05, "loss": 2.2543, "step": 7892 }, { "epoch": 1.05, "grad_norm": 0.29296875, "learning_rate": 4.510305735716572e-05, "loss": 2.2805, "step": 7893 }, { "epoch": 1.05, "grad_norm": 0.29296875, "learning_rate": 4.510179902267783e-05, "loss": 2.2249, "step": 7894 }, { "epoch": 1.05, "grad_norm": 0.287109375, "learning_rate": 4.510054054409577e-05, "loss": 2.2632, "step": 7895 }, { "epoch": 1.05, "grad_norm": 0.279296875, "learning_rate": 4.509928192142857e-05, "loss": 2.2474, "step": 7896 }, { "epoch": 1.05, "grad_norm": 0.296875, "learning_rate": 4.509802315468524e-05, "loss": 2.2778, "step": 7897 }, { "epoch": 1.05, "grad_norm": 0.3046875, "learning_rate": 4.5096764243874826e-05, "loss": 2.2494, "step": 7898 }, { "epoch": 1.05, "grad_norm": 0.28515625, "learning_rate": 4.5095505189006325e-05, "loss": 2.2595, "step": 7899 }, { "epoch": 1.05, "grad_norm": 0.3046875, "learning_rate": 4.509424599008878e-05, "loss": 2.2579, "step": 7900 }, { "epoch": 1.05, "grad_norm": 0.2890625, "learning_rate": 4.509298664713122e-05, "loss": 2.2576, "step": 7901 }, { "epoch": 1.05, "grad_norm": 0.294921875, "learning_rate": 4.509172716014265e-05, "loss": 2.2705, "step": 7902 }, { "epoch": 1.05, "grad_norm": 0.279296875, "learning_rate": 4.509046752913213e-05, "loss": 2.2443, "step": 7903 }, { "epoch": 1.05, "grad_norm": 0.28515625, "learning_rate": 4.5089207754108663e-05, "loss": 2.243, "step": 7904 }, { "epoch": 1.05, "grad_norm": 0.287109375, "learning_rate": 4.508794783508129e-05, "loss": 2.2532, "step": 7905 }, { "epoch": 1.05, "grad_norm": 0.2890625, "learning_rate": 4.5086687772059047e-05, "loss": 2.2748, "step": 7906 }, { "epoch": 1.05, "grad_norm": 0.306640625, "learning_rate": 4.5085427565050964e-05, "loss": 2.2199, "step": 7907 }, { "epoch": 1.05, "grad_norm": 0.294921875, "learning_rate": 4.5084167214066064e-05, "loss": 2.2596, "step": 7908 }, { "epoch": 1.05, "grad_norm": 0.28125, "learning_rate": 4.5082906719113386e-05, "loss": 2.2552, "step": 7909 }, { "epoch": 1.06, "grad_norm": 0.279296875, "learning_rate": 4.508164608020198e-05, "loss": 2.2595, "step": 7910 }, { "epoch": 1.06, "grad_norm": 0.3046875, "learning_rate": 4.5080385297340855e-05, "loss": 2.27, "step": 7911 }, { "epoch": 1.06, "grad_norm": 0.298828125, "learning_rate": 4.5079124370539075e-05, "loss": 2.268, "step": 7912 }, { "epoch": 1.06, "grad_norm": 0.291015625, "learning_rate": 4.507786329980567e-05, "loss": 2.2398, "step": 7913 }, { "epoch": 1.06, "grad_norm": 0.28515625, "learning_rate": 4.507660208514967e-05, "loss": 2.2718, "step": 7914 }, { "epoch": 1.06, "grad_norm": 0.291015625, "learning_rate": 4.507534072658013e-05, "loss": 2.2645, "step": 7915 }, { "epoch": 1.06, "grad_norm": 0.287109375, "learning_rate": 4.507407922410608e-05, "loss": 2.2406, "step": 7916 }, { "epoch": 1.06, "grad_norm": 0.28125, "learning_rate": 4.507281757773657e-05, "loss": 2.2612, "step": 7917 }, { "epoch": 1.06, "grad_norm": 0.2890625, "learning_rate": 4.507155578748064e-05, "loss": 2.2641, "step": 7918 }, { "epoch": 1.06, "grad_norm": 0.283203125, "learning_rate": 4.507029385334734e-05, "loss": 2.262, "step": 7919 }, { "epoch": 1.06, "grad_norm": 0.298828125, "learning_rate": 4.5069031775345706e-05, "loss": 2.2456, "step": 7920 }, { "epoch": 1.06, "grad_norm": 0.283203125, "learning_rate": 4.506776955348479e-05, "loss": 2.2353, "step": 7921 }, { "epoch": 1.06, "grad_norm": 0.296875, "learning_rate": 4.506650718777366e-05, "loss": 2.2718, "step": 7922 }, { "epoch": 1.06, "grad_norm": 0.298828125, "learning_rate": 4.5065244678221324e-05, "loss": 2.2721, "step": 7923 }, { "epoch": 1.06, "grad_norm": 0.30078125, "learning_rate": 4.506398202483686e-05, "loss": 2.2386, "step": 7924 }, { "epoch": 1.06, "grad_norm": 0.2890625, "learning_rate": 4.506271922762932e-05, "loss": 2.2471, "step": 7925 }, { "epoch": 1.06, "grad_norm": 0.28515625, "learning_rate": 4.506145628660774e-05, "loss": 2.2584, "step": 7926 }, { "epoch": 1.06, "grad_norm": 0.294921875, "learning_rate": 4.5060193201781185e-05, "loss": 2.2727, "step": 7927 }, { "epoch": 1.06, "grad_norm": 0.296875, "learning_rate": 4.505892997315871e-05, "loss": 2.2855, "step": 7928 }, { "epoch": 1.06, "grad_norm": 0.2890625, "learning_rate": 4.505766660074936e-05, "loss": 2.2855, "step": 7929 }, { "epoch": 1.06, "grad_norm": 0.291015625, "learning_rate": 4.5056403084562205e-05, "loss": 2.272, "step": 7930 }, { "epoch": 1.06, "grad_norm": 0.28515625, "learning_rate": 4.505513942460629e-05, "loss": 2.2474, "step": 7931 }, { "epoch": 1.06, "grad_norm": 0.29296875, "learning_rate": 4.505387562089067e-05, "loss": 2.253, "step": 7932 }, { "epoch": 1.06, "grad_norm": 0.310546875, "learning_rate": 4.505261167342442e-05, "loss": 2.2551, "step": 7933 }, { "epoch": 1.06, "grad_norm": 0.314453125, "learning_rate": 4.505134758221658e-05, "loss": 2.2764, "step": 7934 }, { "epoch": 1.06, "grad_norm": 0.298828125, "learning_rate": 4.505008334727624e-05, "loss": 2.2524, "step": 7935 }, { "epoch": 1.06, "grad_norm": 0.298828125, "learning_rate": 4.5048818968612435e-05, "loss": 2.2821, "step": 7936 }, { "epoch": 1.06, "grad_norm": 0.28125, "learning_rate": 4.504755444623425e-05, "loss": 2.2099, "step": 7937 }, { "epoch": 1.06, "grad_norm": 0.2890625, "learning_rate": 4.504628978015073e-05, "loss": 2.2517, "step": 7938 }, { "epoch": 1.06, "grad_norm": 0.314453125, "learning_rate": 4.5045024970370945e-05, "loss": 2.2373, "step": 7939 }, { "epoch": 1.06, "grad_norm": 0.2890625, "learning_rate": 4.5043760016903973e-05, "loss": 2.289, "step": 7940 }, { "epoch": 1.06, "grad_norm": 0.296875, "learning_rate": 4.504249491975887e-05, "loss": 2.245, "step": 7941 }, { "epoch": 1.06, "grad_norm": 0.28515625, "learning_rate": 4.5041229678944705e-05, "loss": 2.2398, "step": 7942 }, { "epoch": 1.06, "grad_norm": 0.310546875, "learning_rate": 4.503996429447056e-05, "loss": 2.2656, "step": 7943 }, { "epoch": 1.06, "grad_norm": 0.296875, "learning_rate": 4.503869876634549e-05, "loss": 2.2588, "step": 7944 }, { "epoch": 1.06, "grad_norm": 0.28125, "learning_rate": 4.5037433094578574e-05, "loss": 2.2393, "step": 7945 }, { "epoch": 1.06, "grad_norm": 0.294921875, "learning_rate": 4.503616727917888e-05, "loss": 2.2699, "step": 7946 }, { "epoch": 1.06, "grad_norm": 0.298828125, "learning_rate": 4.503490132015549e-05, "loss": 2.26, "step": 7947 }, { "epoch": 1.06, "grad_norm": 0.291015625, "learning_rate": 4.5033635217517476e-05, "loss": 2.2793, "step": 7948 }, { "epoch": 1.06, "grad_norm": 0.462890625, "learning_rate": 4.50323689712739e-05, "loss": 2.2557, "step": 7949 }, { "epoch": 1.06, "grad_norm": 0.2890625, "learning_rate": 4.5031102581433856e-05, "loss": 2.2835, "step": 7950 }, { "epoch": 1.06, "grad_norm": 0.298828125, "learning_rate": 4.5029836048006425e-05, "loss": 2.2576, "step": 7951 }, { "epoch": 1.06, "grad_norm": 0.291015625, "learning_rate": 4.502856937100066e-05, "loss": 2.2565, "step": 7952 }, { "epoch": 1.06, "grad_norm": 0.29296875, "learning_rate": 4.502730255042567e-05, "loss": 2.2333, "step": 7953 }, { "epoch": 1.06, "grad_norm": 0.2890625, "learning_rate": 4.502603558629052e-05, "loss": 2.2683, "step": 7954 }, { "epoch": 1.06, "grad_norm": 0.291015625, "learning_rate": 4.5024768478604293e-05, "loss": 2.2848, "step": 7955 }, { "epoch": 1.06, "grad_norm": 0.2890625, "learning_rate": 4.502350122737608e-05, "loss": 2.2958, "step": 7956 }, { "epoch": 1.06, "grad_norm": 0.314453125, "learning_rate": 4.502223383261496e-05, "loss": 2.2581, "step": 7957 }, { "epoch": 1.06, "grad_norm": 0.287109375, "learning_rate": 4.5020966294330005e-05, "loss": 2.2483, "step": 7958 }, { "epoch": 1.06, "grad_norm": 0.283203125, "learning_rate": 4.5019698612530324e-05, "loss": 2.254, "step": 7959 }, { "epoch": 1.06, "grad_norm": 0.287109375, "learning_rate": 4.501843078722499e-05, "loss": 2.2389, "step": 7960 }, { "epoch": 1.06, "grad_norm": 0.2890625, "learning_rate": 4.50171628184231e-05, "loss": 2.2666, "step": 7961 }, { "epoch": 1.06, "grad_norm": 0.291015625, "learning_rate": 4.501589470613373e-05, "loss": 2.2866, "step": 7962 }, { "epoch": 1.06, "grad_norm": 0.287109375, "learning_rate": 4.5014626450365973e-05, "loss": 2.2829, "step": 7963 }, { "epoch": 1.06, "grad_norm": 0.2890625, "learning_rate": 4.501335805112893e-05, "loss": 2.2432, "step": 7964 }, { "epoch": 1.06, "grad_norm": 0.287109375, "learning_rate": 4.501208950843169e-05, "loss": 2.2248, "step": 7965 }, { "epoch": 1.06, "grad_norm": 0.27734375, "learning_rate": 4.5010820822283337e-05, "loss": 2.2478, "step": 7966 }, { "epoch": 1.06, "grad_norm": 0.27734375, "learning_rate": 4.5009551992692984e-05, "loss": 2.2302, "step": 7967 }, { "epoch": 1.06, "grad_norm": 0.2890625, "learning_rate": 4.5008283019669705e-05, "loss": 2.259, "step": 7968 }, { "epoch": 1.06, "grad_norm": 0.314453125, "learning_rate": 4.5007013903222606e-05, "loss": 2.2593, "step": 7969 }, { "epoch": 1.06, "grad_norm": 0.302734375, "learning_rate": 4.5005744643360794e-05, "loss": 2.2557, "step": 7970 }, { "epoch": 1.06, "grad_norm": 0.3046875, "learning_rate": 4.500447524009335e-05, "loss": 2.272, "step": 7971 }, { "epoch": 1.06, "grad_norm": 0.291015625, "learning_rate": 4.5003205693429374e-05, "loss": 2.2199, "step": 7972 }, { "epoch": 1.06, "grad_norm": 0.298828125, "learning_rate": 4.500193600337797e-05, "loss": 2.2656, "step": 7973 }, { "epoch": 1.06, "grad_norm": 0.29296875, "learning_rate": 4.5000666169948255e-05, "loss": 2.2641, "step": 7974 }, { "epoch": 1.06, "grad_norm": 0.287109375, "learning_rate": 4.499939619314933e-05, "loss": 2.2382, "step": 7975 }, { "epoch": 1.06, "grad_norm": 0.28125, "learning_rate": 4.499812607299026e-05, "loss": 2.2705, "step": 7976 }, { "epoch": 1.06, "grad_norm": 0.291015625, "learning_rate": 4.4996855809480195e-05, "loss": 2.2097, "step": 7977 }, { "epoch": 1.06, "grad_norm": 0.30078125, "learning_rate": 4.499558540262821e-05, "loss": 2.2512, "step": 7978 }, { "epoch": 1.06, "grad_norm": 0.283203125, "learning_rate": 4.499431485244344e-05, "loss": 2.2447, "step": 7979 }, { "epoch": 1.06, "grad_norm": 0.296875, "learning_rate": 4.499304415893496e-05, "loss": 2.2658, "step": 7980 }, { "epoch": 1.06, "grad_norm": 0.279296875, "learning_rate": 4.499177332211191e-05, "loss": 2.2772, "step": 7981 }, { "epoch": 1.06, "grad_norm": 0.296875, "learning_rate": 4.4990502341983374e-05, "loss": 2.2387, "step": 7982 }, { "epoch": 1.06, "grad_norm": 0.291015625, "learning_rate": 4.498923121855847e-05, "loss": 2.2365, "step": 7983 }, { "epoch": 1.06, "grad_norm": 0.294921875, "learning_rate": 4.498795995184633e-05, "loss": 2.2564, "step": 7984 }, { "epoch": 1.07, "grad_norm": 0.29296875, "learning_rate": 4.498668854185604e-05, "loss": 2.2225, "step": 7985 }, { "epoch": 1.07, "grad_norm": 0.28515625, "learning_rate": 4.498541698859672e-05, "loss": 2.2565, "step": 7986 }, { "epoch": 1.07, "grad_norm": 0.27734375, "learning_rate": 4.49841452920775e-05, "loss": 2.2276, "step": 7987 }, { "epoch": 1.07, "grad_norm": 0.28515625, "learning_rate": 4.4982873452307475e-05, "loss": 2.2566, "step": 7988 }, { "epoch": 1.07, "grad_norm": 0.291015625, "learning_rate": 4.4981601469295764e-05, "loss": 2.2444, "step": 7989 }, { "epoch": 1.07, "grad_norm": 0.291015625, "learning_rate": 4.49803293430515e-05, "loss": 2.2452, "step": 7990 }, { "epoch": 1.07, "grad_norm": 0.287109375, "learning_rate": 4.49790570735838e-05, "loss": 2.258, "step": 7991 }, { "epoch": 1.07, "grad_norm": 0.27734375, "learning_rate": 4.4977784660901765e-05, "loss": 2.2637, "step": 7992 }, { "epoch": 1.07, "grad_norm": 0.28515625, "learning_rate": 4.497651210501454e-05, "loss": 2.2626, "step": 7993 }, { "epoch": 1.07, "grad_norm": 0.291015625, "learning_rate": 4.497523940593123e-05, "loss": 2.2635, "step": 7994 }, { "epoch": 1.07, "grad_norm": 0.296875, "learning_rate": 4.497396656366096e-05, "loss": 2.2504, "step": 7995 }, { "epoch": 1.07, "grad_norm": 0.283203125, "learning_rate": 4.4972693578212864e-05, "loss": 2.2535, "step": 7996 }, { "epoch": 1.07, "grad_norm": 0.30078125, "learning_rate": 4.4971420449596065e-05, "loss": 2.2609, "step": 7997 }, { "epoch": 1.07, "grad_norm": 0.29296875, "learning_rate": 4.497014717781968e-05, "loss": 2.2651, "step": 7998 }, { "epoch": 1.07, "grad_norm": 0.298828125, "learning_rate": 4.4968873762892836e-05, "loss": 2.2742, "step": 7999 }, { "epoch": 1.07, "grad_norm": 0.302734375, "learning_rate": 4.496760020482467e-05, "loss": 2.2452, "step": 8000 }, { "epoch": 1.07, "eval_loss": 2.2556815147399902, "eval_runtime": 613.8572, "eval_samples_per_second": 63.16, "eval_steps_per_second": 7.896, "step": 8000 } ], "logging_steps": 1, "max_steps": 37480, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 1000, "total_flos": 1.0322360729534988e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }