dophys's picture
Upload folder using huggingface_hub
83f6dbf verified
raw
history blame contribute delete
No virus
75.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9994141769185706,
"eval_steps": 500,
"global_step": 853,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0023432923257176333,
"grad_norm": 0.00011052378977183253,
"learning_rate": 5e-06,
"loss": 0.0,
"step": 2
},
{
"epoch": 0.0046865846514352666,
"grad_norm": 0.00020697808940894902,
"learning_rate": 4.9941245593419514e-06,
"loss": 0.0,
"step": 4
},
{
"epoch": 0.007029876977152899,
"grad_norm": 0.0012532881228253245,
"learning_rate": 4.982373678025853e-06,
"loss": 0.0,
"step": 6
},
{
"epoch": 0.009373169302870533,
"grad_norm": 0.0008086035377345979,
"learning_rate": 4.970622796709754e-06,
"loss": 0.0,
"step": 8
},
{
"epoch": 0.011716461628588167,
"grad_norm": 0.0021155672147870064,
"learning_rate": 4.958871915393655e-06,
"loss": 0.0,
"step": 10
},
{
"epoch": 0.014059753954305799,
"grad_norm": 0.0012233309680595994,
"learning_rate": 4.947121034077556e-06,
"loss": 0.0,
"step": 12
},
{
"epoch": 0.016403046280023433,
"grad_norm": 0.0027737286873161793,
"learning_rate": 4.9353701527614576e-06,
"loss": 0.0,
"step": 14
},
{
"epoch": 0.018746338605741066,
"grad_norm": 0.0042906939052045345,
"learning_rate": 4.923619271445359e-06,
"loss": 0.0,
"step": 16
},
{
"epoch": 0.0210896309314587,
"grad_norm": 0.0005172386299818754,
"learning_rate": 4.91186839012926e-06,
"loss": 0.0,
"step": 18
},
{
"epoch": 0.023432923257176334,
"grad_norm": 0.002410772955045104,
"learning_rate": 4.900117508813161e-06,
"loss": 0.0,
"step": 20
},
{
"epoch": 0.025776215582893967,
"grad_norm": 0.6443753242492676,
"learning_rate": 4.8883666274970625e-06,
"loss": 0.0027,
"step": 22
},
{
"epoch": 0.028119507908611598,
"grad_norm": 0.004394118674099445,
"learning_rate": 4.876615746180964e-06,
"loss": 0.0001,
"step": 24
},
{
"epoch": 0.03046280023432923,
"grad_norm": 0.006466630846261978,
"learning_rate": 4.864864864864866e-06,
"loss": 0.0001,
"step": 26
},
{
"epoch": 0.032806092560046865,
"grad_norm": 0.011924203485250473,
"learning_rate": 4.853113983548767e-06,
"loss": 0.0001,
"step": 28
},
{
"epoch": 0.0351493848857645,
"grad_norm": 0.23746930062770844,
"learning_rate": 4.841363102232668e-06,
"loss": 0.0001,
"step": 30
},
{
"epoch": 0.03749267721148213,
"grad_norm": 0.0031001348979771137,
"learning_rate": 4.8296122209165694e-06,
"loss": 0.0,
"step": 32
},
{
"epoch": 0.03983596953719976,
"grad_norm": 0.0029028633143752813,
"learning_rate": 4.817861339600471e-06,
"loss": 0.0,
"step": 34
},
{
"epoch": 0.0421792618629174,
"grad_norm": 0.014626468531787395,
"learning_rate": 4.806110458284372e-06,
"loss": 0.0001,
"step": 36
},
{
"epoch": 0.04452255418863503,
"grad_norm": 0.001155451056547463,
"learning_rate": 4.794359576968273e-06,
"loss": 0.0,
"step": 38
},
{
"epoch": 0.04686584651435267,
"grad_norm": 0.003476829966530204,
"learning_rate": 4.782608695652174e-06,
"loss": 0.0,
"step": 40
},
{
"epoch": 0.0492091388400703,
"grad_norm": 0.0002227002551080659,
"learning_rate": 4.7708578143360756e-06,
"loss": 0.0,
"step": 42
},
{
"epoch": 0.051552431165787935,
"grad_norm": 0.0001427282695658505,
"learning_rate": 4.759106933019977e-06,
"loss": 0.0,
"step": 44
},
{
"epoch": 0.053895723491505565,
"grad_norm": 0.0027408564928919077,
"learning_rate": 4.747356051703878e-06,
"loss": 0.0002,
"step": 46
},
{
"epoch": 0.056239015817223195,
"grad_norm": 0.0020253027323633432,
"learning_rate": 4.735605170387779e-06,
"loss": 0.0,
"step": 48
},
{
"epoch": 0.05858230814294083,
"grad_norm": 0.001760220737196505,
"learning_rate": 4.723854289071681e-06,
"loss": 0.0,
"step": 50
},
{
"epoch": 0.06092560046865846,
"grad_norm": 0.0010492791188880801,
"learning_rate": 4.7121034077555825e-06,
"loss": 0.0,
"step": 52
},
{
"epoch": 0.0632688927943761,
"grad_norm": 0.002001305343583226,
"learning_rate": 4.700352526439484e-06,
"loss": 0.0,
"step": 54
},
{
"epoch": 0.06561218512009373,
"grad_norm": 0.18566887080669403,
"learning_rate": 4.688601645123384e-06,
"loss": 0.0009,
"step": 56
},
{
"epoch": 0.06795547744581136,
"grad_norm": 0.0009072807151824236,
"learning_rate": 4.676850763807285e-06,
"loss": 0.0,
"step": 58
},
{
"epoch": 0.070298769771529,
"grad_norm": 0.003983665257692337,
"learning_rate": 4.665099882491187e-06,
"loss": 0.0006,
"step": 60
},
{
"epoch": 0.07264206209724663,
"grad_norm": 0.01946200616657734,
"learning_rate": 4.653349001175089e-06,
"loss": 0.0001,
"step": 62
},
{
"epoch": 0.07498535442296426,
"grad_norm": 0.004048655740916729,
"learning_rate": 4.64159811985899e-06,
"loss": 0.0,
"step": 64
},
{
"epoch": 0.0773286467486819,
"grad_norm": 0.0005872617475688457,
"learning_rate": 4.629847238542891e-06,
"loss": 0.0001,
"step": 66
},
{
"epoch": 0.07967193907439953,
"grad_norm": 0.008831903338432312,
"learning_rate": 4.618096357226792e-06,
"loss": 0.0001,
"step": 68
},
{
"epoch": 0.08201523140011717,
"grad_norm": 0.006819219794124365,
"learning_rate": 4.6063454759106936e-06,
"loss": 0.0001,
"step": 70
},
{
"epoch": 0.0843585237258348,
"grad_norm": 0.0007863900391384959,
"learning_rate": 4.594594594594596e-06,
"loss": 0.0,
"step": 72
},
{
"epoch": 0.08670181605155243,
"grad_norm": 0.032210394740104675,
"learning_rate": 4.582843713278496e-06,
"loss": 0.0001,
"step": 74
},
{
"epoch": 0.08904510837727006,
"grad_norm": 0.2614983916282654,
"learning_rate": 4.571092831962397e-06,
"loss": 0.0008,
"step": 76
},
{
"epoch": 0.0913884007029877,
"grad_norm": 0.0012551415711641312,
"learning_rate": 4.5593419506462985e-06,
"loss": 0.0,
"step": 78
},
{
"epoch": 0.09373169302870533,
"grad_norm": 0.0019108065171167254,
"learning_rate": 4.5475910693302e-06,
"loss": 0.0,
"step": 80
},
{
"epoch": 0.09607498535442296,
"grad_norm": 0.02294810675084591,
"learning_rate": 4.535840188014101e-06,
"loss": 0.0001,
"step": 82
},
{
"epoch": 0.0984182776801406,
"grad_norm": 0.0012388118775561452,
"learning_rate": 4.524089306698003e-06,
"loss": 0.0,
"step": 84
},
{
"epoch": 0.10076157000585823,
"grad_norm": 0.001227575121447444,
"learning_rate": 4.512338425381904e-06,
"loss": 0.0001,
"step": 86
},
{
"epoch": 0.10310486233157587,
"grad_norm": 0.004755712114274502,
"learning_rate": 4.5005875440658054e-06,
"loss": 0.0001,
"step": 88
},
{
"epoch": 0.1054481546572935,
"grad_norm": 0.00837083999067545,
"learning_rate": 4.488836662749707e-06,
"loss": 0.0001,
"step": 90
},
{
"epoch": 0.10779144698301113,
"grad_norm": 0.48219314217567444,
"learning_rate": 4.477085781433608e-06,
"loss": 0.0017,
"step": 92
},
{
"epoch": 0.11013473930872876,
"grad_norm": 0.022060217335820198,
"learning_rate": 4.465334900117509e-06,
"loss": 0.0001,
"step": 94
},
{
"epoch": 0.11247803163444639,
"grad_norm": 0.0019385352497920394,
"learning_rate": 4.45358401880141e-06,
"loss": 0.0,
"step": 96
},
{
"epoch": 0.11482132396016403,
"grad_norm": 0.01225442998111248,
"learning_rate": 4.4418331374853116e-06,
"loss": 0.0001,
"step": 98
},
{
"epoch": 0.11716461628588166,
"grad_norm": 0.0005759520572610199,
"learning_rate": 4.430082256169213e-06,
"loss": 0.0,
"step": 100
},
{
"epoch": 0.1195079086115993,
"grad_norm": 0.02452813647687435,
"learning_rate": 4.418331374853114e-06,
"loss": 0.0001,
"step": 102
},
{
"epoch": 0.12185120093731693,
"grad_norm": 0.0078084710985422134,
"learning_rate": 4.406580493537015e-06,
"loss": 0.0001,
"step": 104
},
{
"epoch": 0.12419449326303457,
"grad_norm": 0.004263446666300297,
"learning_rate": 4.394829612220917e-06,
"loss": 0.0001,
"step": 106
},
{
"epoch": 0.1265377855887522,
"grad_norm": 0.0016304058954119682,
"learning_rate": 4.3830787309048185e-06,
"loss": 0.0001,
"step": 108
},
{
"epoch": 0.12888107791446984,
"grad_norm": 0.011672005988657475,
"learning_rate": 4.37132784958872e-06,
"loss": 0.0002,
"step": 110
},
{
"epoch": 0.13122437024018746,
"grad_norm": 0.002603155327960849,
"learning_rate": 4.359576968272621e-06,
"loss": 0.0,
"step": 112
},
{
"epoch": 0.1335676625659051,
"grad_norm": 0.005059251096099615,
"learning_rate": 4.347826086956522e-06,
"loss": 0.0001,
"step": 114
},
{
"epoch": 0.13591095489162272,
"grad_norm": 0.0005816388293169439,
"learning_rate": 4.3360752056404234e-06,
"loss": 0.0001,
"step": 116
},
{
"epoch": 0.13825424721734036,
"grad_norm": 0.019756818190217018,
"learning_rate": 4.324324324324325e-06,
"loss": 0.0001,
"step": 118
},
{
"epoch": 0.140597539543058,
"grad_norm": 0.0023519208189100027,
"learning_rate": 4.312573443008226e-06,
"loss": 0.0,
"step": 120
},
{
"epoch": 0.14294083186877563,
"grad_norm": 0.0028086318634450436,
"learning_rate": 4.300822561692127e-06,
"loss": 0.0,
"step": 122
},
{
"epoch": 0.14528412419449327,
"grad_norm": 0.0022307527251541615,
"learning_rate": 4.289071680376028e-06,
"loss": 0.0,
"step": 124
},
{
"epoch": 0.14762741652021089,
"grad_norm": 0.014247684739530087,
"learning_rate": 4.2773207990599296e-06,
"loss": 0.0001,
"step": 126
},
{
"epoch": 0.14997070884592853,
"grad_norm": 0.00011139630805701017,
"learning_rate": 4.265569917743831e-06,
"loss": 0.0,
"step": 128
},
{
"epoch": 0.15231400117164617,
"grad_norm": 0.000514341751113534,
"learning_rate": 4.253819036427733e-06,
"loss": 0.0,
"step": 130
},
{
"epoch": 0.1546572934973638,
"grad_norm": 0.002176255453377962,
"learning_rate": 4.242068155111634e-06,
"loss": 0.0001,
"step": 132
},
{
"epoch": 0.15700058582308143,
"grad_norm": 0.018497969955205917,
"learning_rate": 4.230317273795535e-06,
"loss": 0.0001,
"step": 134
},
{
"epoch": 0.15934387814879905,
"grad_norm": 0.013157431036233902,
"learning_rate": 4.2185663924794365e-06,
"loss": 0.0001,
"step": 136
},
{
"epoch": 0.1616871704745167,
"grad_norm": 0.007630129344761372,
"learning_rate": 4.206815511163338e-06,
"loss": 0.0,
"step": 138
},
{
"epoch": 0.16403046280023434,
"grad_norm": 0.0008055138750933111,
"learning_rate": 4.195064629847239e-06,
"loss": 0.0001,
"step": 140
},
{
"epoch": 0.16637375512595196,
"grad_norm": 0.006306421477347612,
"learning_rate": 4.18331374853114e-06,
"loss": 0.0,
"step": 142
},
{
"epoch": 0.1687170474516696,
"grad_norm": 0.020266445353627205,
"learning_rate": 4.1715628672150414e-06,
"loss": 0.0001,
"step": 144
},
{
"epoch": 0.17106033977738722,
"grad_norm": 0.00037427974166348577,
"learning_rate": 4.159811985898943e-06,
"loss": 0.0,
"step": 146
},
{
"epoch": 0.17340363210310486,
"grad_norm": 0.004259356763213873,
"learning_rate": 4.148061104582844e-06,
"loss": 0.0001,
"step": 148
},
{
"epoch": 0.1757469244288225,
"grad_norm": 0.0010232679778710008,
"learning_rate": 4.136310223266745e-06,
"loss": 0.0001,
"step": 150
},
{
"epoch": 0.17809021675454012,
"grad_norm": 0.003952402155846357,
"learning_rate": 4.124559341950647e-06,
"loss": 0.0,
"step": 152
},
{
"epoch": 0.18043350908025776,
"grad_norm": 0.0013295585522428155,
"learning_rate": 4.112808460634548e-06,
"loss": 0.0,
"step": 154
},
{
"epoch": 0.1827768014059754,
"grad_norm": 0.013831949792802334,
"learning_rate": 4.10105757931845e-06,
"loss": 0.0001,
"step": 156
},
{
"epoch": 0.18512009373169303,
"grad_norm": 0.0036904062144458294,
"learning_rate": 4.089306698002351e-06,
"loss": 0.0,
"step": 158
},
{
"epoch": 0.18746338605741067,
"grad_norm": 0.002993196714669466,
"learning_rate": 4.077555816686252e-06,
"loss": 0.0,
"step": 160
},
{
"epoch": 0.18980667838312829,
"grad_norm": 0.0016740068094804883,
"learning_rate": 4.0658049353701525e-06,
"loss": 0.0001,
"step": 162
},
{
"epoch": 0.19214997070884593,
"grad_norm": 0.012307717464864254,
"learning_rate": 4.0540540540540545e-06,
"loss": 0.0001,
"step": 164
},
{
"epoch": 0.19449326303456357,
"grad_norm": 0.0012654109159484506,
"learning_rate": 4.042303172737956e-06,
"loss": 0.0,
"step": 166
},
{
"epoch": 0.1968365553602812,
"grad_norm": 0.12437883019447327,
"learning_rate": 4.030552291421857e-06,
"loss": 0.0006,
"step": 168
},
{
"epoch": 0.19917984768599883,
"grad_norm": 8.974138472694904e-05,
"learning_rate": 4.018801410105758e-06,
"loss": 0.0,
"step": 170
},
{
"epoch": 0.20152314001171645,
"grad_norm": 0.0011903212871402502,
"learning_rate": 4.007050528789659e-06,
"loss": 0.0001,
"step": 172
},
{
"epoch": 0.2038664323374341,
"grad_norm": 0.012350277975201607,
"learning_rate": 3.995299647473561e-06,
"loss": 0.0001,
"step": 174
},
{
"epoch": 0.20620972466315174,
"grad_norm": 0.01664598099887371,
"learning_rate": 3.983548766157463e-06,
"loss": 0.0001,
"step": 176
},
{
"epoch": 0.20855301698886936,
"grad_norm": 0.0064240009523928165,
"learning_rate": 3.971797884841364e-06,
"loss": 0.0001,
"step": 178
},
{
"epoch": 0.210896309314587,
"grad_norm": 0.0031362581066787243,
"learning_rate": 3.960047003525264e-06,
"loss": 0.0,
"step": 180
},
{
"epoch": 0.21323960164030462,
"grad_norm": 0.00012566300574690104,
"learning_rate": 3.9482961222091655e-06,
"loss": 0.0001,
"step": 182
},
{
"epoch": 0.21558289396602226,
"grad_norm": 0.0018261070363223553,
"learning_rate": 3.936545240893067e-06,
"loss": 0.0,
"step": 184
},
{
"epoch": 0.2179261862917399,
"grad_norm": 0.0010897299507632852,
"learning_rate": 3.924794359576969e-06,
"loss": 0.0,
"step": 186
},
{
"epoch": 0.22026947861745752,
"grad_norm": 0.006528445053845644,
"learning_rate": 3.91304347826087e-06,
"loss": 0.0,
"step": 188
},
{
"epoch": 0.22261277094317516,
"grad_norm": 0.4626096785068512,
"learning_rate": 3.901292596944771e-06,
"loss": 0.0009,
"step": 190
},
{
"epoch": 0.22495606326889278,
"grad_norm": 0.002359338803216815,
"learning_rate": 3.8895417156286725e-06,
"loss": 0.0,
"step": 192
},
{
"epoch": 0.22729935559461042,
"grad_norm": 0.004821418318897486,
"learning_rate": 3.877790834312574e-06,
"loss": 0.0,
"step": 194
},
{
"epoch": 0.22964264792032807,
"grad_norm": 0.0011465001152828336,
"learning_rate": 3.866039952996475e-06,
"loss": 0.0008,
"step": 196
},
{
"epoch": 0.23198594024604569,
"grad_norm": 0.0007381247123703361,
"learning_rate": 3.854289071680376e-06,
"loss": 0.0001,
"step": 198
},
{
"epoch": 0.23432923257176333,
"grad_norm": 0.0023091183975338936,
"learning_rate": 3.842538190364277e-06,
"loss": 0.0,
"step": 200
},
{
"epoch": 0.23667252489748097,
"grad_norm": 0.0005714365397579968,
"learning_rate": 3.830787309048179e-06,
"loss": 0.0,
"step": 202
},
{
"epoch": 0.2390158172231986,
"grad_norm": 0.00351692084223032,
"learning_rate": 3.81903642773208e-06,
"loss": 0.0,
"step": 204
},
{
"epoch": 0.24135910954891623,
"grad_norm": 5.926425728830509e-05,
"learning_rate": 3.8072855464159815e-06,
"loss": 0.0,
"step": 206
},
{
"epoch": 0.24370240187463385,
"grad_norm": 0.0016421001637354493,
"learning_rate": 3.7955346650998827e-06,
"loss": 0.0,
"step": 208
},
{
"epoch": 0.2460456942003515,
"grad_norm": 0.012118808925151825,
"learning_rate": 3.7837837837837844e-06,
"loss": 0.0001,
"step": 210
},
{
"epoch": 0.24838898652606914,
"grad_norm": 0.00024874648079276085,
"learning_rate": 3.7720329024676856e-06,
"loss": 0.0002,
"step": 212
},
{
"epoch": 0.2507322788517868,
"grad_norm": 0.0017625248292461038,
"learning_rate": 3.760282021151587e-06,
"loss": 0.0,
"step": 214
},
{
"epoch": 0.2530755711775044,
"grad_norm": 0.0007431196281686425,
"learning_rate": 3.748531139835488e-06,
"loss": 0.0,
"step": 216
},
{
"epoch": 0.255418863503222,
"grad_norm": 0.0007026457460597157,
"learning_rate": 3.7367802585193893e-06,
"loss": 0.0,
"step": 218
},
{
"epoch": 0.2577621558289397,
"grad_norm": 0.002397920237854123,
"learning_rate": 3.72502937720329e-06,
"loss": 0.0,
"step": 220
},
{
"epoch": 0.2601054481546573,
"grad_norm": 0.003177257487550378,
"learning_rate": 3.713278495887192e-06,
"loss": 0.0,
"step": 222
},
{
"epoch": 0.2624487404803749,
"grad_norm": 0.003142025787383318,
"learning_rate": 3.7015276145710934e-06,
"loss": 0.0001,
"step": 224
},
{
"epoch": 0.26479203280609254,
"grad_norm": 0.03788410872220993,
"learning_rate": 3.6897767332549946e-06,
"loss": 0.0002,
"step": 226
},
{
"epoch": 0.2671353251318102,
"grad_norm": 0.005685464479029179,
"learning_rate": 3.6780258519388954e-06,
"loss": 0.0003,
"step": 228
},
{
"epoch": 0.2694786174575278,
"grad_norm": 0.0010328789940103889,
"learning_rate": 3.6662749706227966e-06,
"loss": 0.0003,
"step": 230
},
{
"epoch": 0.27182190978324544,
"grad_norm": 0.0052024442702531815,
"learning_rate": 3.6545240893066987e-06,
"loss": 0.0,
"step": 232
},
{
"epoch": 0.2741652021089631,
"grad_norm": 0.006033598445355892,
"learning_rate": 3.6427732079906e-06,
"loss": 0.0,
"step": 234
},
{
"epoch": 0.27650849443468073,
"grad_norm": 0.00023948443413246423,
"learning_rate": 3.6310223266745007e-06,
"loss": 0.0001,
"step": 236
},
{
"epoch": 0.27885178676039835,
"grad_norm": 0.00016467843670397997,
"learning_rate": 3.619271445358402e-06,
"loss": 0.0,
"step": 238
},
{
"epoch": 0.281195079086116,
"grad_norm": 0.003566320287063718,
"learning_rate": 3.607520564042303e-06,
"loss": 0.0,
"step": 240
},
{
"epoch": 0.28353837141183363,
"grad_norm": 0.00033969045034609735,
"learning_rate": 3.5957696827262044e-06,
"loss": 0.0,
"step": 242
},
{
"epoch": 0.28588166373755125,
"grad_norm": 0.0033994223922491074,
"learning_rate": 3.5840188014101065e-06,
"loss": 0.0,
"step": 244
},
{
"epoch": 0.28822495606326887,
"grad_norm": 0.14746786653995514,
"learning_rate": 3.5722679200940073e-06,
"loss": 0.0008,
"step": 246
},
{
"epoch": 0.29056824838898654,
"grad_norm": 0.012470235116779804,
"learning_rate": 3.5605170387779085e-06,
"loss": 0.0,
"step": 248
},
{
"epoch": 0.29291154071470415,
"grad_norm": 0.08307931572198868,
"learning_rate": 3.5487661574618097e-06,
"loss": 0.0003,
"step": 250
},
{
"epoch": 0.29525483304042177,
"grad_norm": 0.00033245363738387823,
"learning_rate": 3.537015276145711e-06,
"loss": 0.0,
"step": 252
},
{
"epoch": 0.29759812536613944,
"grad_norm": 0.0018247144762426615,
"learning_rate": 3.525264394829612e-06,
"loss": 0.0,
"step": 254
},
{
"epoch": 0.29994141769185706,
"grad_norm": 0.0011103990254923701,
"learning_rate": 3.513513513513514e-06,
"loss": 0.0001,
"step": 256
},
{
"epoch": 0.3022847100175747,
"grad_norm": 0.0010811882093548775,
"learning_rate": 3.501762632197415e-06,
"loss": 0.0,
"step": 258
},
{
"epoch": 0.30462800234329235,
"grad_norm": 0.011172047816216946,
"learning_rate": 3.4900117508813163e-06,
"loss": 0.0001,
"step": 260
},
{
"epoch": 0.30697129466900996,
"grad_norm": 0.0013676233356818557,
"learning_rate": 3.4782608695652175e-06,
"loss": 0.0,
"step": 262
},
{
"epoch": 0.3093145869947276,
"grad_norm": 0.002147970488294959,
"learning_rate": 3.4665099882491187e-06,
"loss": 0.0,
"step": 264
},
{
"epoch": 0.31165787932044525,
"grad_norm": 0.0009826518362388015,
"learning_rate": 3.4547591069330204e-06,
"loss": 0.0,
"step": 266
},
{
"epoch": 0.31400117164616287,
"grad_norm": 0.001499099307693541,
"learning_rate": 3.4430082256169216e-06,
"loss": 0.0,
"step": 268
},
{
"epoch": 0.3163444639718805,
"grad_norm": 0.001323301112279296,
"learning_rate": 3.431257344300823e-06,
"loss": 0.0,
"step": 270
},
{
"epoch": 0.3186877562975981,
"grad_norm": 0.018010340631008148,
"learning_rate": 3.419506462984724e-06,
"loss": 0.0005,
"step": 272
},
{
"epoch": 0.3210310486233158,
"grad_norm": 0.0024064648896455765,
"learning_rate": 3.4077555816686253e-06,
"loss": 0.0,
"step": 274
},
{
"epoch": 0.3233743409490334,
"grad_norm": 0.02396260015666485,
"learning_rate": 3.3960047003525265e-06,
"loss": 0.0001,
"step": 276
},
{
"epoch": 0.325717633274751,
"grad_norm": 0.002070352202281356,
"learning_rate": 3.384253819036428e-06,
"loss": 0.0,
"step": 278
},
{
"epoch": 0.3280609256004687,
"grad_norm": 0.0003108434902969748,
"learning_rate": 3.3725029377203294e-06,
"loss": 0.0001,
"step": 280
},
{
"epoch": 0.3304042179261863,
"grad_norm": 0.006573045626282692,
"learning_rate": 3.3607520564042306e-06,
"loss": 0.0001,
"step": 282
},
{
"epoch": 0.3327475102519039,
"grad_norm": 0.0004413512069731951,
"learning_rate": 3.349001175088132e-06,
"loss": 0.0001,
"step": 284
},
{
"epoch": 0.3350908025776216,
"grad_norm": 0.0005645502242259681,
"learning_rate": 3.337250293772033e-06,
"loss": 0.0,
"step": 286
},
{
"epoch": 0.3374340949033392,
"grad_norm": 0.00034579774364829063,
"learning_rate": 3.3254994124559343e-06,
"loss": 0.0,
"step": 288
},
{
"epoch": 0.3397773872290568,
"grad_norm": 0.003136229468509555,
"learning_rate": 3.313748531139836e-06,
"loss": 0.0,
"step": 290
},
{
"epoch": 0.34212067955477443,
"grad_norm": 0.0031148705165833235,
"learning_rate": 3.301997649823737e-06,
"loss": 0.0,
"step": 292
},
{
"epoch": 0.3444639718804921,
"grad_norm": 0.0012612566351890564,
"learning_rate": 3.2902467685076384e-06,
"loss": 0.0,
"step": 294
},
{
"epoch": 0.3468072642062097,
"grad_norm": 0.0007469533011317253,
"learning_rate": 3.2784958871915396e-06,
"loss": 0.0,
"step": 296
},
{
"epoch": 0.34915055653192734,
"grad_norm": 0.04412250965833664,
"learning_rate": 3.266745005875441e-06,
"loss": 0.0003,
"step": 298
},
{
"epoch": 0.351493848857645,
"grad_norm": 0.004462533164769411,
"learning_rate": 3.2549941245593425e-06,
"loss": 0.0088,
"step": 300
},
{
"epoch": 0.3538371411833626,
"grad_norm": 0.002911294111981988,
"learning_rate": 3.2432432432432437e-06,
"loss": 0.0006,
"step": 302
},
{
"epoch": 0.35618043350908024,
"grad_norm": 0.0015191801358014345,
"learning_rate": 3.231492361927145e-06,
"loss": 0.0,
"step": 304
},
{
"epoch": 0.3585237258347979,
"grad_norm": 0.017380721867084503,
"learning_rate": 3.219741480611046e-06,
"loss": 0.0094,
"step": 306
},
{
"epoch": 0.36086701816051553,
"grad_norm": 0.002749436302110553,
"learning_rate": 3.2079905992949474e-06,
"loss": 0.0001,
"step": 308
},
{
"epoch": 0.36321031048623315,
"grad_norm": 0.0008673086995258927,
"learning_rate": 3.1962397179788486e-06,
"loss": 0.0,
"step": 310
},
{
"epoch": 0.3655536028119508,
"grad_norm": 0.00361701101064682,
"learning_rate": 3.1844888366627503e-06,
"loss": 0.0,
"step": 312
},
{
"epoch": 0.36789689513766843,
"grad_norm": 0.006906528025865555,
"learning_rate": 3.1727379553466515e-06,
"loss": 0.0,
"step": 314
},
{
"epoch": 0.37024018746338605,
"grad_norm": 2.259305238723755,
"learning_rate": 3.1609870740305527e-06,
"loss": 0.0157,
"step": 316
},
{
"epoch": 0.37258347978910367,
"grad_norm": 0.00017454673070460558,
"learning_rate": 3.149236192714454e-06,
"loss": 0.0,
"step": 318
},
{
"epoch": 0.37492677211482134,
"grad_norm": 0.16197967529296875,
"learning_rate": 3.137485311398355e-06,
"loss": 0.0009,
"step": 320
},
{
"epoch": 0.37727006444053895,
"grad_norm": 0.002247605938464403,
"learning_rate": 3.1257344300822564e-06,
"loss": 0.0,
"step": 322
},
{
"epoch": 0.37961335676625657,
"grad_norm": 0.023727795109152794,
"learning_rate": 3.113983548766158e-06,
"loss": 0.0001,
"step": 324
},
{
"epoch": 0.38195664909197424,
"grad_norm": 0.008455273695290089,
"learning_rate": 3.1022326674500592e-06,
"loss": 0.0001,
"step": 326
},
{
"epoch": 0.38429994141769186,
"grad_norm": 0.00022873218404129148,
"learning_rate": 3.0904817861339605e-06,
"loss": 0.0,
"step": 328
},
{
"epoch": 0.3866432337434095,
"grad_norm": 3.000872850418091,
"learning_rate": 3.0787309048178617e-06,
"loss": 0.055,
"step": 330
},
{
"epoch": 0.38898652606912715,
"grad_norm": 0.002177221467718482,
"learning_rate": 3.066980023501763e-06,
"loss": 0.0,
"step": 332
},
{
"epoch": 0.39132981839484476,
"grad_norm": 0.002786975121125579,
"learning_rate": 3.0552291421856637e-06,
"loss": 0.0,
"step": 334
},
{
"epoch": 0.3936731107205624,
"grad_norm": 0.004335256293416023,
"learning_rate": 3.043478260869566e-06,
"loss": 0.0,
"step": 336
},
{
"epoch": 0.39601640304628,
"grad_norm": 0.007627409417182207,
"learning_rate": 3.031727379553467e-06,
"loss": 0.0001,
"step": 338
},
{
"epoch": 0.39835969537199767,
"grad_norm": 0.002631911775097251,
"learning_rate": 3.0199764982373682e-06,
"loss": 0.0,
"step": 340
},
{
"epoch": 0.4007029876977153,
"grad_norm": 0.009561799466609955,
"learning_rate": 3.008225616921269e-06,
"loss": 0.0001,
"step": 342
},
{
"epoch": 0.4030462800234329,
"grad_norm": 0.0026635443791747093,
"learning_rate": 2.9964747356051703e-06,
"loss": 0.0001,
"step": 344
},
{
"epoch": 0.4053895723491506,
"grad_norm": 0.0001533351169200614,
"learning_rate": 2.9847238542890723e-06,
"loss": 0.0,
"step": 346
},
{
"epoch": 0.4077328646748682,
"grad_norm": 0.0835270956158638,
"learning_rate": 2.9729729729729736e-06,
"loss": 0.0005,
"step": 348
},
{
"epoch": 0.4100761570005858,
"grad_norm": 0.003761101048439741,
"learning_rate": 2.9612220916568744e-06,
"loss": 0.0,
"step": 350
},
{
"epoch": 0.4124194493263035,
"grad_norm": 0.01136633288115263,
"learning_rate": 2.9494712103407756e-06,
"loss": 0.0002,
"step": 352
},
{
"epoch": 0.4147627416520211,
"grad_norm": 0.007711971178650856,
"learning_rate": 2.937720329024677e-06,
"loss": 0.0001,
"step": 354
},
{
"epoch": 0.4171060339777387,
"grad_norm": 0.0003854953683912754,
"learning_rate": 2.925969447708578e-06,
"loss": 0.0,
"step": 356
},
{
"epoch": 0.4194493263034564,
"grad_norm": 0.019140860065817833,
"learning_rate": 2.91421856639248e-06,
"loss": 0.0001,
"step": 358
},
{
"epoch": 0.421792618629174,
"grad_norm": 0.0013410028768703341,
"learning_rate": 2.902467685076381e-06,
"loss": 0.0003,
"step": 360
},
{
"epoch": 0.4241359109548916,
"grad_norm": 0.0011243935441598296,
"learning_rate": 2.890716803760282e-06,
"loss": 0.0001,
"step": 362
},
{
"epoch": 0.42647920328060923,
"grad_norm": 0.012134709395468235,
"learning_rate": 2.8789659224441834e-06,
"loss": 0.0001,
"step": 364
},
{
"epoch": 0.4288224956063269,
"grad_norm": 0.0028234529308974743,
"learning_rate": 2.8672150411280846e-06,
"loss": 0.0,
"step": 366
},
{
"epoch": 0.4311657879320445,
"grad_norm": 0.004319467581808567,
"learning_rate": 2.855464159811986e-06,
"loss": 0.0,
"step": 368
},
{
"epoch": 0.43350908025776214,
"grad_norm": 0.0068093533627688885,
"learning_rate": 2.8437132784958875e-06,
"loss": 0.0001,
"step": 370
},
{
"epoch": 0.4358523725834798,
"grad_norm": 0.016774361953139305,
"learning_rate": 2.8319623971797887e-06,
"loss": 0.0001,
"step": 372
},
{
"epoch": 0.4381956649091974,
"grad_norm": 0.014978869818150997,
"learning_rate": 2.82021151586369e-06,
"loss": 0.0001,
"step": 374
},
{
"epoch": 0.44053895723491504,
"grad_norm": 0.0010881100315600634,
"learning_rate": 2.808460634547591e-06,
"loss": 0.0004,
"step": 376
},
{
"epoch": 0.4428822495606327,
"grad_norm": 0.05522293969988823,
"learning_rate": 2.7967097532314924e-06,
"loss": 0.0002,
"step": 378
},
{
"epoch": 0.44522554188635033,
"grad_norm": 0.0027575818821787834,
"learning_rate": 2.784958871915394e-06,
"loss": 0.0,
"step": 380
},
{
"epoch": 0.44756883421206795,
"grad_norm": 0.0006020054570399225,
"learning_rate": 2.7732079905992952e-06,
"loss": 0.0005,
"step": 382
},
{
"epoch": 0.44991212653778556,
"grad_norm": 0.0025616425555199385,
"learning_rate": 2.7614571092831965e-06,
"loss": 0.0,
"step": 384
},
{
"epoch": 0.45225541886350323,
"grad_norm": 0.0018823420396074653,
"learning_rate": 2.7497062279670977e-06,
"loss": 0.0,
"step": 386
},
{
"epoch": 0.45459871118922085,
"grad_norm": 0.003241207217797637,
"learning_rate": 2.737955346650999e-06,
"loss": 0.0,
"step": 388
},
{
"epoch": 0.45694200351493847,
"grad_norm": 0.0010485474485903978,
"learning_rate": 2.7262044653349e-06,
"loss": 0.0002,
"step": 390
},
{
"epoch": 0.45928529584065614,
"grad_norm": 0.013366922736167908,
"learning_rate": 2.714453584018802e-06,
"loss": 0.0001,
"step": 392
},
{
"epoch": 0.46162858816637375,
"grad_norm": 0.0005886501166969538,
"learning_rate": 2.702702702702703e-06,
"loss": 0.0,
"step": 394
},
{
"epoch": 0.46397188049209137,
"grad_norm": 7.603697304148227e-05,
"learning_rate": 2.6909518213866042e-06,
"loss": 0.0,
"step": 396
},
{
"epoch": 0.46631517281780904,
"grad_norm": 0.000614571908954531,
"learning_rate": 2.6792009400705055e-06,
"loss": 0.0023,
"step": 398
},
{
"epoch": 0.46865846514352666,
"grad_norm": 0.046423882246017456,
"learning_rate": 2.6674500587544067e-06,
"loss": 0.0002,
"step": 400
},
{
"epoch": 0.4710017574692443,
"grad_norm": 0.0005994020029902458,
"learning_rate": 2.655699177438308e-06,
"loss": 0.0,
"step": 402
},
{
"epoch": 0.47334504979496195,
"grad_norm": 0.011609828099608421,
"learning_rate": 2.6439482961222096e-06,
"loss": 0.0001,
"step": 404
},
{
"epoch": 0.47568834212067956,
"grad_norm": 0.007135775871574879,
"learning_rate": 2.632197414806111e-06,
"loss": 0.0002,
"step": 406
},
{
"epoch": 0.4780316344463972,
"grad_norm": 0.0028773818630725145,
"learning_rate": 2.620446533490012e-06,
"loss": 0.0,
"step": 408
},
{
"epoch": 0.4803749267721148,
"grad_norm": 0.13341404497623444,
"learning_rate": 2.6086956521739132e-06,
"loss": 0.0008,
"step": 410
},
{
"epoch": 0.48271821909783247,
"grad_norm": 0.03130058944225311,
"learning_rate": 2.5969447708578145e-06,
"loss": 0.0001,
"step": 412
},
{
"epoch": 0.4850615114235501,
"grad_norm": 0.006637818645685911,
"learning_rate": 2.5851938895417157e-06,
"loss": 0.0001,
"step": 414
},
{
"epoch": 0.4874048037492677,
"grad_norm": 0.0006390800117515028,
"learning_rate": 2.5734430082256173e-06,
"loss": 0.0001,
"step": 416
},
{
"epoch": 0.4897480960749854,
"grad_norm": 0.02106345072388649,
"learning_rate": 2.5616921269095186e-06,
"loss": 0.0002,
"step": 418
},
{
"epoch": 0.492091388400703,
"grad_norm": 0.0009213433368131518,
"learning_rate": 2.5499412455934198e-06,
"loss": 0.0001,
"step": 420
},
{
"epoch": 0.4944346807264206,
"grad_norm": 2.5962471961975098,
"learning_rate": 2.538190364277321e-06,
"loss": 0.1436,
"step": 422
},
{
"epoch": 0.4967779730521383,
"grad_norm": 0.009386847727000713,
"learning_rate": 2.5264394829612222e-06,
"loss": 0.0001,
"step": 424
},
{
"epoch": 0.4991212653778559,
"grad_norm": 0.01308267842978239,
"learning_rate": 2.514688601645124e-06,
"loss": 0.0001,
"step": 426
},
{
"epoch": 0.5014645577035736,
"grad_norm": 0.006409250665456057,
"learning_rate": 2.502937720329025e-06,
"loss": 0.0,
"step": 428
},
{
"epoch": 0.5038078500292912,
"grad_norm": 0.0018047624034807086,
"learning_rate": 2.4911868390129263e-06,
"loss": 0.0001,
"step": 430
},
{
"epoch": 0.5061511423550088,
"grad_norm": 0.007056268397718668,
"learning_rate": 2.4794359576968276e-06,
"loss": 0.0,
"step": 432
},
{
"epoch": 0.5084944346807264,
"grad_norm": 2.4651243686676025,
"learning_rate": 2.4676850763807288e-06,
"loss": 0.0245,
"step": 434
},
{
"epoch": 0.510837727006444,
"grad_norm": 0.0025760605931282043,
"learning_rate": 2.45593419506463e-06,
"loss": 0.0,
"step": 436
},
{
"epoch": 0.5131810193321616,
"grad_norm": 0.059660654515028,
"learning_rate": 2.4441833137485312e-06,
"loss": 0.0003,
"step": 438
},
{
"epoch": 0.5155243116578794,
"grad_norm": 0.032668206840753555,
"learning_rate": 2.432432432432433e-06,
"loss": 0.0002,
"step": 440
},
{
"epoch": 0.517867603983597,
"grad_norm": 0.002476097084581852,
"learning_rate": 2.420681551116334e-06,
"loss": 0.0,
"step": 442
},
{
"epoch": 0.5202108963093146,
"grad_norm": 0.0005356927285902202,
"learning_rate": 2.4089306698002353e-06,
"loss": 0.0,
"step": 444
},
{
"epoch": 0.5225541886350322,
"grad_norm": 0.01949264481663704,
"learning_rate": 2.3971797884841366e-06,
"loss": 0.0001,
"step": 446
},
{
"epoch": 0.5248974809607498,
"grad_norm": 0.4609091281890869,
"learning_rate": 2.3854289071680378e-06,
"loss": 0.0013,
"step": 448
},
{
"epoch": 0.5272407732864675,
"grad_norm": 0.002268969314172864,
"learning_rate": 2.373678025851939e-06,
"loss": 0.027,
"step": 450
},
{
"epoch": 0.5295840656121851,
"grad_norm": 0.42679542303085327,
"learning_rate": 2.3619271445358407e-06,
"loss": 0.002,
"step": 452
},
{
"epoch": 0.5319273579379028,
"grad_norm": 0.030775954946875572,
"learning_rate": 2.350176263219742e-06,
"loss": 0.0001,
"step": 454
},
{
"epoch": 0.5342706502636204,
"grad_norm": 0.006208465900272131,
"learning_rate": 2.3384253819036427e-06,
"loss": 0.0001,
"step": 456
},
{
"epoch": 0.536613942589338,
"grad_norm": 0.001203950378112495,
"learning_rate": 2.3266745005875443e-06,
"loss": 0.0,
"step": 458
},
{
"epoch": 0.5389572349150556,
"grad_norm": 0.0013062539510428905,
"learning_rate": 2.3149236192714456e-06,
"loss": 0.0001,
"step": 460
},
{
"epoch": 0.5413005272407733,
"grad_norm": 0.014242034405469894,
"learning_rate": 2.3031727379553468e-06,
"loss": 0.0001,
"step": 462
},
{
"epoch": 0.5436438195664909,
"grad_norm": 0.0024558689910918474,
"learning_rate": 2.291421856639248e-06,
"loss": 0.0,
"step": 464
},
{
"epoch": 0.5459871118922085,
"grad_norm": 0.006871205288916826,
"learning_rate": 2.2796709753231492e-06,
"loss": 0.0,
"step": 466
},
{
"epoch": 0.5483304042179262,
"grad_norm": 0.016744021326303482,
"learning_rate": 2.2679200940070505e-06,
"loss": 0.0001,
"step": 468
},
{
"epoch": 0.5506736965436438,
"grad_norm": 0.0025478950701653957,
"learning_rate": 2.256169212690952e-06,
"loss": 0.0,
"step": 470
},
{
"epoch": 0.5530169888693615,
"grad_norm": 0.002553507685661316,
"learning_rate": 2.2444183313748533e-06,
"loss": 0.0,
"step": 472
},
{
"epoch": 0.5553602811950791,
"grad_norm": 0.0018396044615656137,
"learning_rate": 2.2326674500587546e-06,
"loss": 0.0002,
"step": 474
},
{
"epoch": 0.5577035735207967,
"grad_norm": 0.002036860678344965,
"learning_rate": 2.2209165687426558e-06,
"loss": 0.0,
"step": 476
},
{
"epoch": 0.5600468658465143,
"grad_norm": 0.0024688418488949537,
"learning_rate": 2.209165687426557e-06,
"loss": 0.0,
"step": 478
},
{
"epoch": 0.562390158172232,
"grad_norm": 0.0028820293955504894,
"learning_rate": 2.1974148061104587e-06,
"loss": 0.0001,
"step": 480
},
{
"epoch": 0.5647334504979497,
"grad_norm": 0.00978305283933878,
"learning_rate": 2.18566392479436e-06,
"loss": 0.0001,
"step": 482
},
{
"epoch": 0.5670767428236673,
"grad_norm": 0.147267147898674,
"learning_rate": 2.173913043478261e-06,
"loss": 0.0014,
"step": 484
},
{
"epoch": 0.5694200351493849,
"grad_norm": 0.005025573540478945,
"learning_rate": 2.1621621621621623e-06,
"loss": 0.0006,
"step": 486
},
{
"epoch": 0.5717633274751025,
"grad_norm": 0.0010051846038550138,
"learning_rate": 2.1504112808460636e-06,
"loss": 0.0003,
"step": 488
},
{
"epoch": 0.5741066198008201,
"grad_norm": 0.009055075235664845,
"learning_rate": 2.1386603995299648e-06,
"loss": 0.0001,
"step": 490
},
{
"epoch": 0.5764499121265377,
"grad_norm": 0.0077414545230567455,
"learning_rate": 2.1269095182138664e-06,
"loss": 0.0001,
"step": 492
},
{
"epoch": 0.5787932044522555,
"grad_norm": 0.0059761228039860725,
"learning_rate": 2.1151586368977677e-06,
"loss": 0.0001,
"step": 494
},
{
"epoch": 0.5811364967779731,
"grad_norm": 0.0014180493308231235,
"learning_rate": 2.103407755581669e-06,
"loss": 0.0,
"step": 496
},
{
"epoch": 0.5834797891036907,
"grad_norm": 0.0022345769684761763,
"learning_rate": 2.09165687426557e-06,
"loss": 0.0,
"step": 498
},
{
"epoch": 0.5858230814294083,
"grad_norm": 0.005645833443850279,
"learning_rate": 2.0799059929494713e-06,
"loss": 0.0001,
"step": 500
},
{
"epoch": 0.5881663737551259,
"grad_norm": 0.011956258676946163,
"learning_rate": 2.0681551116333726e-06,
"loss": 0.0001,
"step": 502
},
{
"epoch": 0.5905096660808435,
"grad_norm": 0.01774289458990097,
"learning_rate": 2.056404230317274e-06,
"loss": 0.0002,
"step": 504
},
{
"epoch": 0.5928529584065613,
"grad_norm": 0.21751126646995544,
"learning_rate": 2.0446533490011754e-06,
"loss": 0.0012,
"step": 506
},
{
"epoch": 0.5951962507322789,
"grad_norm": 0.00307491235435009,
"learning_rate": 2.0329024676850762e-06,
"loss": 0.0,
"step": 508
},
{
"epoch": 0.5975395430579965,
"grad_norm": 0.021330738440155983,
"learning_rate": 2.021151586368978e-06,
"loss": 0.0002,
"step": 510
},
{
"epoch": 0.5998828353837141,
"grad_norm": 0.020080704241991043,
"learning_rate": 2.009400705052879e-06,
"loss": 0.0001,
"step": 512
},
{
"epoch": 0.6022261277094317,
"grad_norm": 0.020522406324744225,
"learning_rate": 1.9976498237367803e-06,
"loss": 0.0002,
"step": 514
},
{
"epoch": 0.6045694200351494,
"grad_norm": 0.0004171329492237419,
"learning_rate": 1.985898942420682e-06,
"loss": 0.0,
"step": 516
},
{
"epoch": 0.606912712360867,
"grad_norm": 0.0027696220204234123,
"learning_rate": 1.9741480611045828e-06,
"loss": 0.0,
"step": 518
},
{
"epoch": 0.6092560046865847,
"grad_norm": 0.021467505022883415,
"learning_rate": 1.9623971797884844e-06,
"loss": 0.0002,
"step": 520
},
{
"epoch": 0.6115992970123023,
"grad_norm": 0.011968536302447319,
"learning_rate": 1.9506462984723856e-06,
"loss": 0.0001,
"step": 522
},
{
"epoch": 0.6139425893380199,
"grad_norm": 0.0011503971181809902,
"learning_rate": 1.938895417156287e-06,
"loss": 0.0004,
"step": 524
},
{
"epoch": 0.6162858816637375,
"grad_norm": 0.02280554361641407,
"learning_rate": 1.927144535840188e-06,
"loss": 0.0002,
"step": 526
},
{
"epoch": 0.6186291739894552,
"grad_norm": 0.008415359072387218,
"learning_rate": 1.9153936545240893e-06,
"loss": 0.0001,
"step": 528
},
{
"epoch": 0.6209724663151728,
"grad_norm": 0.0024012764915823936,
"learning_rate": 1.9036427732079908e-06,
"loss": 0.0001,
"step": 530
},
{
"epoch": 0.6233157586408905,
"grad_norm": 0.010776808485388756,
"learning_rate": 1.8918918918918922e-06,
"loss": 0.0001,
"step": 532
},
{
"epoch": 0.6256590509666081,
"grad_norm": 0.017337538301944733,
"learning_rate": 1.8801410105757934e-06,
"loss": 0.0001,
"step": 534
},
{
"epoch": 0.6280023432923257,
"grad_norm": 0.0019926901441067457,
"learning_rate": 1.8683901292596946e-06,
"loss": 0.0001,
"step": 536
},
{
"epoch": 0.6303456356180434,
"grad_norm": 0.013480707071721554,
"learning_rate": 1.856639247943596e-06,
"loss": 0.0002,
"step": 538
},
{
"epoch": 0.632688927943761,
"grad_norm": 0.005608106963336468,
"learning_rate": 1.8448883666274973e-06,
"loss": 0.0002,
"step": 540
},
{
"epoch": 0.6350322202694786,
"grad_norm": 0.002639380283653736,
"learning_rate": 1.8331374853113983e-06,
"loss": 0.0001,
"step": 542
},
{
"epoch": 0.6373755125951962,
"grad_norm": 0.0022652854677289724,
"learning_rate": 1.8213866039953e-06,
"loss": 0.0002,
"step": 544
},
{
"epoch": 0.6397188049209139,
"grad_norm": 0.003624632954597473,
"learning_rate": 1.809635722679201e-06,
"loss": 0.0001,
"step": 546
},
{
"epoch": 0.6420620972466315,
"grad_norm": 0.007647163700312376,
"learning_rate": 1.7978848413631022e-06,
"loss": 0.0004,
"step": 548
},
{
"epoch": 0.6444053895723492,
"grad_norm": 0.012163680978119373,
"learning_rate": 1.7861339600470036e-06,
"loss": 0.0002,
"step": 550
},
{
"epoch": 0.6467486818980668,
"grad_norm": 0.09023822844028473,
"learning_rate": 1.7743830787309049e-06,
"loss": 0.0009,
"step": 552
},
{
"epoch": 0.6490919742237844,
"grad_norm": 0.006924999412149191,
"learning_rate": 1.762632197414806e-06,
"loss": 0.0001,
"step": 554
},
{
"epoch": 0.651435266549502,
"grad_norm": 0.0006185275269672275,
"learning_rate": 1.7508813160987075e-06,
"loss": 0.0001,
"step": 556
},
{
"epoch": 0.6537785588752196,
"grad_norm": 0.011605402454733849,
"learning_rate": 1.7391304347826088e-06,
"loss": 0.0006,
"step": 558
},
{
"epoch": 0.6561218512009374,
"grad_norm": 0.024394473060965538,
"learning_rate": 1.7273795534665102e-06,
"loss": 0.0001,
"step": 560
},
{
"epoch": 0.658465143526655,
"grad_norm": 0.023466341197490692,
"learning_rate": 1.7156286721504114e-06,
"loss": 0.0002,
"step": 562
},
{
"epoch": 0.6608084358523726,
"grad_norm": 0.010153519921004772,
"learning_rate": 1.7038777908343126e-06,
"loss": 0.0004,
"step": 564
},
{
"epoch": 0.6631517281780902,
"grad_norm": 0.43800845742225647,
"learning_rate": 1.692126909518214e-06,
"loss": 0.0012,
"step": 566
},
{
"epoch": 0.6654950205038078,
"grad_norm": 0.008404972031712532,
"learning_rate": 1.6803760282021153e-06,
"loss": 0.0001,
"step": 568
},
{
"epoch": 0.6678383128295254,
"grad_norm": 0.10615257918834686,
"learning_rate": 1.6686251468860165e-06,
"loss": 0.0005,
"step": 570
},
{
"epoch": 0.6701816051552432,
"grad_norm": 0.019307592883706093,
"learning_rate": 1.656874265569918e-06,
"loss": 0.0003,
"step": 572
},
{
"epoch": 0.6725248974809608,
"grad_norm": 0.012227280996739864,
"learning_rate": 1.6451233842538192e-06,
"loss": 0.0002,
"step": 574
},
{
"epoch": 0.6748681898066784,
"grad_norm": 0.002821948379278183,
"learning_rate": 1.6333725029377204e-06,
"loss": 0.0,
"step": 576
},
{
"epoch": 0.677211482132396,
"grad_norm": 0.010473825968801975,
"learning_rate": 1.6216216216216219e-06,
"loss": 0.0003,
"step": 578
},
{
"epoch": 0.6795547744581136,
"grad_norm": 0.014046385884284973,
"learning_rate": 1.609870740305523e-06,
"loss": 0.0236,
"step": 580
},
{
"epoch": 0.6818980667838312,
"grad_norm": 0.0017795696621760726,
"learning_rate": 1.5981198589894243e-06,
"loss": 0.0001,
"step": 582
},
{
"epoch": 0.6842413591095489,
"grad_norm": 0.0006959863239899278,
"learning_rate": 1.5863689776733257e-06,
"loss": 0.0002,
"step": 584
},
{
"epoch": 0.6865846514352666,
"grad_norm": 0.019652947783470154,
"learning_rate": 1.574618096357227e-06,
"loss": 0.0003,
"step": 586
},
{
"epoch": 0.6889279437609842,
"grad_norm": 0.002340570092201233,
"learning_rate": 1.5628672150411282e-06,
"loss": 0.0,
"step": 588
},
{
"epoch": 0.6912712360867018,
"grad_norm": 0.011190817691385746,
"learning_rate": 1.5511163337250296e-06,
"loss": 0.0002,
"step": 590
},
{
"epoch": 0.6936145284124194,
"grad_norm": 0.001152676297351718,
"learning_rate": 1.5393654524089308e-06,
"loss": 0.0001,
"step": 592
},
{
"epoch": 0.6959578207381371,
"grad_norm": 0.003393592080101371,
"learning_rate": 1.5276145710928319e-06,
"loss": 0.0001,
"step": 594
},
{
"epoch": 0.6983011130638547,
"grad_norm": 0.007921353913843632,
"learning_rate": 1.5158636897767335e-06,
"loss": 0.0001,
"step": 596
},
{
"epoch": 0.7006444053895724,
"grad_norm": 0.1039208471775055,
"learning_rate": 1.5041128084606345e-06,
"loss": 0.0002,
"step": 598
},
{
"epoch": 0.70298769771529,
"grad_norm": 0.0011576958931982517,
"learning_rate": 1.4923619271445362e-06,
"loss": 0.0001,
"step": 600
},
{
"epoch": 0.7053309900410076,
"grad_norm": 0.06407307088375092,
"learning_rate": 1.4806110458284372e-06,
"loss": 0.0003,
"step": 602
},
{
"epoch": 0.7076742823667252,
"grad_norm": 0.012639104388654232,
"learning_rate": 1.4688601645123384e-06,
"loss": 0.0002,
"step": 604
},
{
"epoch": 0.7100175746924429,
"grad_norm": 0.0019591290038079023,
"learning_rate": 1.45710928319624e-06,
"loss": 0.0068,
"step": 606
},
{
"epoch": 0.7123608670181605,
"grad_norm": 0.0008327167597599328,
"learning_rate": 1.445358401880141e-06,
"loss": 0.0001,
"step": 608
},
{
"epoch": 0.7147041593438781,
"grad_norm": 0.0013139324728399515,
"learning_rate": 1.4336075205640423e-06,
"loss": 0.0,
"step": 610
},
{
"epoch": 0.7170474516695958,
"grad_norm": 0.00803992711007595,
"learning_rate": 1.4218566392479437e-06,
"loss": 0.0002,
"step": 612
},
{
"epoch": 0.7193907439953134,
"grad_norm": 0.011399227194488049,
"learning_rate": 1.410105757931845e-06,
"loss": 0.0002,
"step": 614
},
{
"epoch": 0.7217340363210311,
"grad_norm": 0.007171169854700565,
"learning_rate": 1.3983548766157462e-06,
"loss": 0.0002,
"step": 616
},
{
"epoch": 0.7240773286467487,
"grad_norm": 0.7272996306419373,
"learning_rate": 1.3866039952996476e-06,
"loss": 0.0028,
"step": 618
},
{
"epoch": 0.7264206209724663,
"grad_norm": 0.0037387118209153414,
"learning_rate": 1.3748531139835488e-06,
"loss": 0.0001,
"step": 620
},
{
"epoch": 0.7287639132981839,
"grad_norm": 0.015048849396407604,
"learning_rate": 1.36310223266745e-06,
"loss": 0.0002,
"step": 622
},
{
"epoch": 0.7311072056239016,
"grad_norm": 0.0023705060593783855,
"learning_rate": 1.3513513513513515e-06,
"loss": 0.0001,
"step": 624
},
{
"epoch": 0.7334504979496193,
"grad_norm": 0.03966263309121132,
"learning_rate": 1.3396004700352527e-06,
"loss": 0.0003,
"step": 626
},
{
"epoch": 0.7357937902753369,
"grad_norm": 0.0033043306320905685,
"learning_rate": 1.327849588719154e-06,
"loss": 0.0004,
"step": 628
},
{
"epoch": 0.7381370826010545,
"grad_norm": 0.35459718108177185,
"learning_rate": 1.3160987074030554e-06,
"loss": 0.0034,
"step": 630
},
{
"epoch": 0.7404803749267721,
"grad_norm": 0.016441915184259415,
"learning_rate": 1.3043478260869566e-06,
"loss": 0.0002,
"step": 632
},
{
"epoch": 0.7428236672524897,
"grad_norm": 0.0045352657325565815,
"learning_rate": 1.2925969447708578e-06,
"loss": 0.0002,
"step": 634
},
{
"epoch": 0.7451669595782073,
"grad_norm": 0.06311573088169098,
"learning_rate": 1.2808460634547593e-06,
"loss": 0.0005,
"step": 636
},
{
"epoch": 0.7475102519039251,
"grad_norm": 0.11154340207576752,
"learning_rate": 1.2690951821386605e-06,
"loss": 0.0009,
"step": 638
},
{
"epoch": 0.7498535442296427,
"grad_norm": 0.01816423609852791,
"learning_rate": 1.257344300822562e-06,
"loss": 0.0006,
"step": 640
},
{
"epoch": 0.7521968365553603,
"grad_norm": 0.027273530140519142,
"learning_rate": 1.2455934195064632e-06,
"loss": 0.0005,
"step": 642
},
{
"epoch": 0.7545401288810779,
"grad_norm": 0.006555743515491486,
"learning_rate": 1.2338425381903644e-06,
"loss": 0.0003,
"step": 644
},
{
"epoch": 0.7568834212067955,
"grad_norm": 0.0030812753830105066,
"learning_rate": 1.2220916568742656e-06,
"loss": 0.0279,
"step": 646
},
{
"epoch": 0.7592267135325131,
"grad_norm": 0.01702543906867504,
"learning_rate": 1.210340775558167e-06,
"loss": 0.0001,
"step": 648
},
{
"epoch": 0.7615700058582309,
"grad_norm": 0.02607725001871586,
"learning_rate": 1.1985898942420683e-06,
"loss": 0.0001,
"step": 650
},
{
"epoch": 0.7639132981839485,
"grad_norm": 0.006388965994119644,
"learning_rate": 1.1868390129259695e-06,
"loss": 0.0001,
"step": 652
},
{
"epoch": 0.7662565905096661,
"grad_norm": 0.008253968320786953,
"learning_rate": 1.175088131609871e-06,
"loss": 0.0001,
"step": 654
},
{
"epoch": 0.7685998828353837,
"grad_norm": 0.004699599463492632,
"learning_rate": 1.1633372502937722e-06,
"loss": 0.0002,
"step": 656
},
{
"epoch": 0.7709431751611013,
"grad_norm": 0.0012458263663575053,
"learning_rate": 1.1515863689776734e-06,
"loss": 0.0122,
"step": 658
},
{
"epoch": 0.773286467486819,
"grad_norm": 0.02383268252015114,
"learning_rate": 1.1398354876615746e-06,
"loss": 0.0003,
"step": 660
},
{
"epoch": 0.7756297598125366,
"grad_norm": 0.015058089047670364,
"learning_rate": 1.128084606345476e-06,
"loss": 0.0001,
"step": 662
},
{
"epoch": 0.7779730521382543,
"grad_norm": 0.01569475792348385,
"learning_rate": 1.1163337250293773e-06,
"loss": 0.0003,
"step": 664
},
{
"epoch": 0.7803163444639719,
"grad_norm": 0.04253750294446945,
"learning_rate": 1.1045828437132785e-06,
"loss": 0.0002,
"step": 666
},
{
"epoch": 0.7826596367896895,
"grad_norm": 0.015156907960772514,
"learning_rate": 1.09283196239718e-06,
"loss": 0.0002,
"step": 668
},
{
"epoch": 0.7850029291154071,
"grad_norm": 0.03742622211575508,
"learning_rate": 1.0810810810810812e-06,
"loss": 0.0005,
"step": 670
},
{
"epoch": 0.7873462214411248,
"grad_norm": 0.027262985706329346,
"learning_rate": 1.0693301997649824e-06,
"loss": 0.0002,
"step": 672
},
{
"epoch": 0.7896895137668424,
"grad_norm": 0.007641313597559929,
"learning_rate": 1.0575793184488838e-06,
"loss": 0.0002,
"step": 674
},
{
"epoch": 0.79203280609256,
"grad_norm": 0.04441560059785843,
"learning_rate": 1.045828437132785e-06,
"loss": 0.0005,
"step": 676
},
{
"epoch": 0.7943760984182777,
"grad_norm": 0.020478103309869766,
"learning_rate": 1.0340775558166863e-06,
"loss": 0.0002,
"step": 678
},
{
"epoch": 0.7967193907439953,
"grad_norm": 0.10936477035284042,
"learning_rate": 1.0223266745005877e-06,
"loss": 0.001,
"step": 680
},
{
"epoch": 0.799062683069713,
"grad_norm": 0.01284460723400116,
"learning_rate": 1.010575793184489e-06,
"loss": 0.0015,
"step": 682
},
{
"epoch": 0.8014059753954306,
"grad_norm": 0.003440434578806162,
"learning_rate": 9.988249118683902e-07,
"loss": 0.0,
"step": 684
},
{
"epoch": 0.8037492677211482,
"grad_norm": 0.013081365264952183,
"learning_rate": 9.870740305522914e-07,
"loss": 0.0009,
"step": 686
},
{
"epoch": 0.8060925600468658,
"grad_norm": 0.013380183838307858,
"learning_rate": 9.753231492361928e-07,
"loss": 0.0002,
"step": 688
},
{
"epoch": 0.8084358523725835,
"grad_norm": 0.03771582618355751,
"learning_rate": 9.63572267920094e-07,
"loss": 0.0003,
"step": 690
},
{
"epoch": 0.8107791446983011,
"grad_norm": 0.0009556732256896794,
"learning_rate": 9.518213866039954e-07,
"loss": 0.0005,
"step": 692
},
{
"epoch": 0.8131224370240188,
"grad_norm": 0.0019481348572298884,
"learning_rate": 9.400705052878967e-07,
"loss": 0.0001,
"step": 694
},
{
"epoch": 0.8154657293497364,
"grad_norm": 0.0021866948809474707,
"learning_rate": 9.28319623971798e-07,
"loss": 0.0002,
"step": 696
},
{
"epoch": 0.817809021675454,
"grad_norm": 0.007546517997980118,
"learning_rate": 9.165687426556992e-07,
"loss": 0.0007,
"step": 698
},
{
"epoch": 0.8201523140011716,
"grad_norm": 2.074432611465454,
"learning_rate": 9.048178613396005e-07,
"loss": 0.0251,
"step": 700
},
{
"epoch": 0.8224956063268892,
"grad_norm": 0.003374068532139063,
"learning_rate": 8.930669800235018e-07,
"loss": 0.0001,
"step": 702
},
{
"epoch": 0.824838898652607,
"grad_norm": 0.010109562426805496,
"learning_rate": 8.81316098707403e-07,
"loss": 0.0006,
"step": 704
},
{
"epoch": 0.8271821909783246,
"grad_norm": 0.017352379858493805,
"learning_rate": 8.695652173913044e-07,
"loss": 0.0001,
"step": 706
},
{
"epoch": 0.8295254833040422,
"grad_norm": 0.016872087493538857,
"learning_rate": 8.578143360752057e-07,
"loss": 0.0002,
"step": 708
},
{
"epoch": 0.8318687756297598,
"grad_norm": 0.041937246918678284,
"learning_rate": 8.46063454759107e-07,
"loss": 0.0228,
"step": 710
},
{
"epoch": 0.8342120679554774,
"grad_norm": 0.02908233553171158,
"learning_rate": 8.343125734430083e-07,
"loss": 0.0002,
"step": 712
},
{
"epoch": 0.836555360281195,
"grad_norm": 0.0012463816674426198,
"learning_rate": 8.225616921269096e-07,
"loss": 0.0004,
"step": 714
},
{
"epoch": 0.8388986526069128,
"grad_norm": 0.04300675913691521,
"learning_rate": 8.108108108108109e-07,
"loss": 0.0006,
"step": 716
},
{
"epoch": 0.8412419449326304,
"grad_norm": 2.7622828483581543,
"learning_rate": 7.990599294947122e-07,
"loss": 0.149,
"step": 718
},
{
"epoch": 0.843585237258348,
"grad_norm": 0.010049765929579735,
"learning_rate": 7.873090481786135e-07,
"loss": 0.0002,
"step": 720
},
{
"epoch": 0.8459285295840656,
"grad_norm": 0.011876920238137245,
"learning_rate": 7.755581668625148e-07,
"loss": 0.0001,
"step": 722
},
{
"epoch": 0.8482718219097832,
"grad_norm": 0.014826681464910507,
"learning_rate": 7.638072855464159e-07,
"loss": 0.0003,
"step": 724
},
{
"epoch": 0.8506151142355008,
"grad_norm": 0.16368882358074188,
"learning_rate": 7.520564042303173e-07,
"loss": 0.0013,
"step": 726
},
{
"epoch": 0.8529584065612185,
"grad_norm": 0.02603282406926155,
"learning_rate": 7.403055229142186e-07,
"loss": 0.0004,
"step": 728
},
{
"epoch": 0.8553016988869362,
"grad_norm": 0.7740702629089355,
"learning_rate": 7.2855464159812e-07,
"loss": 0.0043,
"step": 730
},
{
"epoch": 0.8576449912126538,
"grad_norm": 0.010226438753306866,
"learning_rate": 7.168037602820211e-07,
"loss": 0.0002,
"step": 732
},
{
"epoch": 0.8599882835383714,
"grad_norm": 0.02008165791630745,
"learning_rate": 7.050528789659225e-07,
"loss": 0.0002,
"step": 734
},
{
"epoch": 0.862331575864089,
"grad_norm": 0.09208586066961288,
"learning_rate": 6.933019976498238e-07,
"loss": 0.0008,
"step": 736
},
{
"epoch": 0.8646748681898067,
"grad_norm": 0.01933148130774498,
"learning_rate": 6.81551116333725e-07,
"loss": 0.0011,
"step": 738
},
{
"epoch": 0.8670181605155243,
"grad_norm": 0.04433580860495567,
"learning_rate": 6.698002350176264e-07,
"loss": 0.0003,
"step": 740
},
{
"epoch": 0.869361452841242,
"grad_norm": 0.01631711982190609,
"learning_rate": 6.580493537015277e-07,
"loss": 0.0003,
"step": 742
},
{
"epoch": 0.8717047451669596,
"grad_norm": 0.042307399213314056,
"learning_rate": 6.462984723854289e-07,
"loss": 0.0004,
"step": 744
},
{
"epoch": 0.8740480374926772,
"grad_norm": 0.22414757311344147,
"learning_rate": 6.345475910693303e-07,
"loss": 0.0018,
"step": 746
},
{
"epoch": 0.8763913298183948,
"grad_norm": 0.17513447999954224,
"learning_rate": 6.227967097532316e-07,
"loss": 0.0015,
"step": 748
},
{
"epoch": 0.8787346221441125,
"grad_norm": 0.3218580186367035,
"learning_rate": 6.110458284371328e-07,
"loss": 0.0029,
"step": 750
},
{
"epoch": 0.8810779144698301,
"grad_norm": 0.026706017553806305,
"learning_rate": 5.992949471210341e-07,
"loss": 0.0004,
"step": 752
},
{
"epoch": 0.8834212067955477,
"grad_norm": 0.4114263951778412,
"learning_rate": 5.875440658049355e-07,
"loss": 0.0035,
"step": 754
},
{
"epoch": 0.8857644991212654,
"grad_norm": 0.25009235739707947,
"learning_rate": 5.757931844888367e-07,
"loss": 0.0016,
"step": 756
},
{
"epoch": 0.888107791446983,
"grad_norm": 1.2960833311080933,
"learning_rate": 5.64042303172738e-07,
"loss": 0.0059,
"step": 758
},
{
"epoch": 0.8904510837727007,
"grad_norm": 0.28417083621025085,
"learning_rate": 5.522914218566393e-07,
"loss": 0.0059,
"step": 760
},
{
"epoch": 0.8927943760984183,
"grad_norm": 0.2292051613330841,
"learning_rate": 5.405405405405406e-07,
"loss": 0.0015,
"step": 762
},
{
"epoch": 0.8951376684241359,
"grad_norm": 0.012189504690468311,
"learning_rate": 5.287896592244419e-07,
"loss": 0.0007,
"step": 764
},
{
"epoch": 0.8974809607498535,
"grad_norm": 0.09458251297473907,
"learning_rate": 5.170387779083431e-07,
"loss": 0.0004,
"step": 766
},
{
"epoch": 0.8998242530755711,
"grad_norm": 0.027070222422480583,
"learning_rate": 5.052878965922445e-07,
"loss": 0.0012,
"step": 768
},
{
"epoch": 0.9021675454012889,
"grad_norm": 0.047401878982782364,
"learning_rate": 4.935370152761457e-07,
"loss": 0.0003,
"step": 770
},
{
"epoch": 0.9045108377270065,
"grad_norm": 0.06239737570285797,
"learning_rate": 4.81786133960047e-07,
"loss": 0.0012,
"step": 772
},
{
"epoch": 0.9068541300527241,
"grad_norm": 2.6842846870422363,
"learning_rate": 4.7003525264394836e-07,
"loss": 0.1103,
"step": 774
},
{
"epoch": 0.9091974223784417,
"grad_norm": 0.057395774871110916,
"learning_rate": 4.582843713278496e-07,
"loss": 0.0004,
"step": 776
},
{
"epoch": 0.9115407147041593,
"grad_norm": 0.16248440742492676,
"learning_rate": 4.465334900117509e-07,
"loss": 0.0018,
"step": 778
},
{
"epoch": 0.9138840070298769,
"grad_norm": 0.11067284643650055,
"learning_rate": 4.347826086956522e-07,
"loss": 0.0011,
"step": 780
},
{
"epoch": 0.9162272993555947,
"grad_norm": 0.07208680361509323,
"learning_rate": 4.230317273795535e-07,
"loss": 0.0011,
"step": 782
},
{
"epoch": 0.9185705916813123,
"grad_norm": 0.4830150604248047,
"learning_rate": 4.112808460634548e-07,
"loss": 0.0022,
"step": 784
},
{
"epoch": 0.9209138840070299,
"grad_norm": 0.01794450171291828,
"learning_rate": 3.995299647473561e-07,
"loss": 0.0011,
"step": 786
},
{
"epoch": 0.9232571763327475,
"grad_norm": 3.0485081672668457,
"learning_rate": 3.877790834312574e-07,
"loss": 0.0508,
"step": 788
},
{
"epoch": 0.9256004686584651,
"grad_norm": 3.130112648010254,
"learning_rate": 3.7602820211515863e-07,
"loss": 0.0194,
"step": 790
},
{
"epoch": 0.9279437609841827,
"grad_norm": 3.5992815494537354,
"learning_rate": 3.6427732079906e-07,
"loss": 0.1036,
"step": 792
},
{
"epoch": 0.9302870533099004,
"grad_norm": 0.0751647800207138,
"learning_rate": 3.5252643948296124e-07,
"loss": 0.0003,
"step": 794
},
{
"epoch": 0.9326303456356181,
"grad_norm": 0.03622612729668617,
"learning_rate": 3.407755581668625e-07,
"loss": 0.0011,
"step": 796
},
{
"epoch": 0.9349736379613357,
"grad_norm": 0.22365981340408325,
"learning_rate": 3.2902467685076385e-07,
"loss": 0.0028,
"step": 798
},
{
"epoch": 0.9373169302870533,
"grad_norm": 0.04666091129183769,
"learning_rate": 3.172737955346651e-07,
"loss": 0.0041,
"step": 800
},
{
"epoch": 0.9396602226127709,
"grad_norm": 5.363467693328857,
"learning_rate": 3.055229142185664e-07,
"loss": 0.2217,
"step": 802
},
{
"epoch": 0.9420035149384886,
"grad_norm": 0.06753694266080856,
"learning_rate": 2.9377203290246774e-07,
"loss": 0.0026,
"step": 804
},
{
"epoch": 0.9443468072642062,
"grad_norm": 2.554419994354248,
"learning_rate": 2.82021151586369e-07,
"loss": 0.0791,
"step": 806
},
{
"epoch": 0.9466900995899239,
"grad_norm": 0.14563411474227905,
"learning_rate": 2.702702702702703e-07,
"loss": 0.0208,
"step": 808
},
{
"epoch": 0.9490333919156415,
"grad_norm": 2.30971360206604,
"learning_rate": 2.5851938895417157e-07,
"loss": 0.1119,
"step": 810
},
{
"epoch": 0.9513766842413591,
"grad_norm": 4.073694229125977,
"learning_rate": 2.4676850763807285e-07,
"loss": 0.1057,
"step": 812
},
{
"epoch": 0.9537199765670767,
"grad_norm": 2.3215789794921875,
"learning_rate": 2.3501762632197418e-07,
"loss": 0.0286,
"step": 814
},
{
"epoch": 0.9560632688927944,
"grad_norm": 0.46727773547172546,
"learning_rate": 2.2326674500587546e-07,
"loss": 0.0714,
"step": 816
},
{
"epoch": 0.958406561218512,
"grad_norm": 2.0026137828826904,
"learning_rate": 2.1151586368977676e-07,
"loss": 0.0455,
"step": 818
},
{
"epoch": 0.9607498535442296,
"grad_norm": 3.2537143230438232,
"learning_rate": 1.9976498237367804e-07,
"loss": 0.0765,
"step": 820
},
{
"epoch": 0.9630931458699473,
"grad_norm": 3.485633134841919,
"learning_rate": 1.8801410105757932e-07,
"loss": 0.0493,
"step": 822
},
{
"epoch": 0.9654364381956649,
"grad_norm": 2.769423246383667,
"learning_rate": 1.7626321974148062e-07,
"loss": 0.0602,
"step": 824
},
{
"epoch": 0.9677797305213826,
"grad_norm": 2.236210823059082,
"learning_rate": 1.6451233842538192e-07,
"loss": 0.1404,
"step": 826
},
{
"epoch": 0.9701230228471002,
"grad_norm": 0.06197360157966614,
"learning_rate": 1.527614571092832e-07,
"loss": 0.0472,
"step": 828
},
{
"epoch": 0.9724663151728178,
"grad_norm": 0.8206185698509216,
"learning_rate": 1.410105757931845e-07,
"loss": 0.0686,
"step": 830
},
{
"epoch": 0.9748096074985354,
"grad_norm": 2.434030771255493,
"learning_rate": 1.2925969447708578e-07,
"loss": 0.1322,
"step": 832
},
{
"epoch": 0.9771528998242531,
"grad_norm": 0.03143630549311638,
"learning_rate": 1.1750881316098709e-07,
"loss": 0.1134,
"step": 834
},
{
"epoch": 0.9794961921499707,
"grad_norm": 0.1770186424255371,
"learning_rate": 1.0575793184488838e-07,
"loss": 0.0011,
"step": 836
},
{
"epoch": 0.9818394844756884,
"grad_norm": 6.03350830078125,
"learning_rate": 9.400705052878966e-08,
"loss": 0.4193,
"step": 838
},
{
"epoch": 0.984182776801406,
"grad_norm": 4.842612266540527,
"learning_rate": 8.225616921269096e-08,
"loss": 0.0951,
"step": 840
},
{
"epoch": 0.9865260691271236,
"grad_norm": 3.111945629119873,
"learning_rate": 7.050528789659225e-08,
"loss": 0.1375,
"step": 842
},
{
"epoch": 0.9888693614528412,
"grad_norm": 3.4468753337860107,
"learning_rate": 5.8754406580493544e-08,
"loss": 0.157,
"step": 844
},
{
"epoch": 0.9912126537785588,
"grad_norm": 5.563467502593994,
"learning_rate": 4.700352526439483e-08,
"loss": 0.1989,
"step": 846
},
{
"epoch": 0.9935559461042766,
"grad_norm": 0.20900146663188934,
"learning_rate": 3.5252643948296127e-08,
"loss": 0.169,
"step": 848
},
{
"epoch": 0.9958992384299942,
"grad_norm": 2.651283025741577,
"learning_rate": 2.3501762632197414e-08,
"loss": 0.0203,
"step": 850
},
{
"epoch": 0.9982425307557118,
"grad_norm": 3.192451000213623,
"learning_rate": 1.1750881316098707e-08,
"loss": 0.0786,
"step": 852
}
],
"logging_steps": 2,
"max_steps": 853,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 20000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}