myownskyW7 commited on
Commit
73bac1d
1 Parent(s): 834945f

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -1045
trainer_state.json DELETED
@@ -1,1045 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.997545409916544,
5
- "eval_steps": 500,
6
- "global_step": 127,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.01,
13
- "learning_rate": "0.0000e+00",
14
- "loss": 3.1086,
15
- "slid_loss": 3.1086,
16
- "step": 1,
17
- "time": 93.96
18
- },
19
- {
20
- "epoch": 0.02,
21
- "learning_rate": "5.0000e-06",
22
- "loss": 2.8339,
23
- "slid_loss": 2.9713,
24
- "step": 2,
25
- "time": 84.7
26
- },
27
- {
28
- "epoch": 0.02,
29
- "learning_rate": "5.0000e-06",
30
- "loss": 2.8656,
31
- "slid_loss": 2.936,
32
- "step": 3,
33
- "time": 79.97
34
- },
35
- {
36
- "epoch": 0.03,
37
- "learning_rate": "5.0000e-06",
38
- "loss": 2.5524,
39
- "slid_loss": 2.8401,
40
- "step": 4,
41
- "time": 76.7
42
- },
43
- {
44
- "epoch": 0.04,
45
- "learning_rate": "5.0000e-06",
46
- "loss": 2.5858,
47
- "slid_loss": 2.7892,
48
- "step": 5,
49
- "time": 81.12
50
- },
51
- {
52
- "epoch": 0.05,
53
- "learning_rate": "5.0000e-06",
54
- "loss": 2.0771,
55
- "slid_loss": 2.6706,
56
- "step": 6,
57
- "time": 78.35
58
- },
59
- {
60
- "epoch": 0.05,
61
- "learning_rate": "5.0000e-06",
62
- "loss": 2.0975,
63
- "slid_loss": 2.5887,
64
- "step": 7,
65
- "time": 81.78
66
- },
67
- {
68
- "epoch": 0.06,
69
- "learning_rate": "5.0000e-06",
70
- "loss": 1.9181,
71
- "slid_loss": 2.5049,
72
- "step": 8,
73
- "time": 81.35
74
- },
75
- {
76
- "epoch": 0.07,
77
- "learning_rate": "5.0000e-06",
78
- "loss": 1.9339,
79
- "slid_loss": 2.4414,
80
- "step": 9,
81
- "time": 83.78
82
- },
83
- {
84
- "epoch": 0.08,
85
- "learning_rate": "5.0000e-06",
86
- "loss": 1.8134,
87
- "slid_loss": 2.3786,
88
- "step": 10,
89
- "time": 79.06
90
- },
91
- {
92
- "epoch": 0.09,
93
- "learning_rate": "5.0000e-06",
94
- "loss": 1.8346,
95
- "slid_loss": 2.3292,
96
- "step": 11,
97
- "time": 80.19
98
- },
99
- {
100
- "epoch": 0.09,
101
- "learning_rate": "5.0000e-06",
102
- "loss": 1.801,
103
- "slid_loss": 2.2851,
104
- "step": 12,
105
- "time": 87.7
106
- },
107
- {
108
- "epoch": 0.1,
109
- "learning_rate": "5.0000e-06",
110
- "loss": 1.8712,
111
- "slid_loss": 2.2533,
112
- "step": 13,
113
- "time": 81.42
114
- },
115
- {
116
- "epoch": 0.11,
117
- "learning_rate": "5.0000e-06",
118
- "loss": 1.8017,
119
- "slid_loss": 2.221,
120
- "step": 14,
121
- "time": 81.39
122
- },
123
- {
124
- "epoch": 0.12,
125
- "learning_rate": "5.0000e-06",
126
- "loss": 1.7378,
127
- "slid_loss": 2.1888,
128
- "step": 15,
129
- "time": 80.29
130
- },
131
- {
132
- "epoch": 0.13,
133
- "learning_rate": "5.0000e-06",
134
- "loss": 1.9118,
135
- "slid_loss": 2.1715,
136
- "step": 16,
137
- "time": 83.04
138
- },
139
- {
140
- "epoch": 0.13,
141
- "learning_rate": "5.0000e-06",
142
- "loss": 1.7942,
143
- "slid_loss": 2.1493,
144
- "step": 17,
145
- "time": 75.98
146
- },
147
- {
148
- "epoch": 0.14,
149
- "learning_rate": "5.0000e-06",
150
- "loss": 1.7516,
151
- "slid_loss": 2.1272,
152
- "step": 18,
153
- "time": 88.91
154
- },
155
- {
156
- "epoch": 0.15,
157
- "learning_rate": "5.0000e-06",
158
- "loss": 1.8129,
159
- "slid_loss": 2.1107,
160
- "step": 19,
161
- "time": 80.76
162
- },
163
- {
164
- "epoch": 0.16,
165
- "learning_rate": "5.0000e-06",
166
- "loss": 1.7673,
167
- "slid_loss": 2.0935,
168
- "step": 20,
169
- "time": 81.28
170
- },
171
- {
172
- "epoch": 0.16,
173
- "learning_rate": "5.0000e-06",
174
- "loss": 1.7984,
175
- "slid_loss": 2.0795,
176
- "step": 21,
177
- "time": 85.37
178
- },
179
- {
180
- "epoch": 0.17,
181
- "learning_rate": "5.0000e-06",
182
- "loss": 1.7733,
183
- "slid_loss": 2.0655,
184
- "step": 22,
185
- "time": 80.72
186
- },
187
- {
188
- "epoch": 0.18,
189
- "learning_rate": "5.0000e-06",
190
- "loss": 1.7691,
191
- "slid_loss": 2.0527,
192
- "step": 23,
193
- "time": 80.11
194
- },
195
- {
196
- "epoch": 0.19,
197
- "learning_rate": "5.0000e-06",
198
- "loss": 1.8212,
199
- "slid_loss": 2.043,
200
- "step": 24,
201
- "time": 81.83
202
- },
203
- {
204
- "epoch": 0.2,
205
- "learning_rate": "5.0000e-06",
206
- "loss": 1.9154,
207
- "slid_loss": 2.0379,
208
- "step": 25,
209
- "time": 78.48
210
- },
211
- {
212
- "epoch": 0.2,
213
- "learning_rate": "5.0000e-06",
214
- "loss": 1.8267,
215
- "slid_loss": 2.0298,
216
- "step": 26,
217
- "time": 80.38
218
- },
219
- {
220
- "epoch": 0.21,
221
- "learning_rate": "5.0000e-06",
222
- "loss": 1.9029,
223
- "slid_loss": 2.0251,
224
- "step": 27,
225
- "time": 84.88
226
- },
227
- {
228
- "epoch": 0.22,
229
- "learning_rate": "5.0000e-06",
230
- "loss": 1.8892,
231
- "slid_loss": 2.0202,
232
- "step": 28,
233
- "time": 81.13
234
- },
235
- {
236
- "epoch": 0.23,
237
- "learning_rate": "5.0000e-06",
238
- "loss": 1.7599,
239
- "slid_loss": 2.0112,
240
- "step": 29,
241
- "time": 83.73
242
- },
243
- {
244
- "epoch": 0.24,
245
- "learning_rate": "5.0000e-06",
246
- "loss": 1.7919,
247
- "slid_loss": 2.0039,
248
- "step": 30,
249
- "time": 82.44
250
- },
251
- {
252
- "epoch": 0.24,
253
- "learning_rate": "5.0000e-06",
254
- "loss": 1.8086,
255
- "slid_loss": 1.9976,
256
- "step": 31,
257
- "time": 83.46
258
- },
259
- {
260
- "epoch": 0.25,
261
- "learning_rate": "5.0000e-06",
262
- "loss": 1.8349,
263
- "slid_loss": 1.9925,
264
- "step": 32,
265
- "time": 78.89
266
- },
267
- {
268
- "epoch": 0.26,
269
- "learning_rate": "5.0000e-06",
270
- "loss": 1.8263,
271
- "slid_loss": 1.9875,
272
- "step": 33,
273
- "time": 79.08
274
- },
275
- {
276
- "epoch": 0.27,
277
- "learning_rate": "5.0000e-06",
278
- "loss": 1.9287,
279
- "slid_loss": 1.9858,
280
- "step": 34,
281
- "time": 81.76
282
- },
283
- {
284
- "epoch": 0.27,
285
- "learning_rate": "5.0000e-06",
286
- "loss": 1.785,
287
- "slid_loss": 1.98,
288
- "step": 35,
289
- "time": 78.4
290
- },
291
- {
292
- "epoch": 0.28,
293
- "learning_rate": "5.0000e-06",
294
- "loss": 1.8191,
295
- "slid_loss": 1.9756,
296
- "step": 36,
297
- "time": 77.74
298
- },
299
- {
300
- "epoch": 0.29,
301
- "learning_rate": "5.0000e-06",
302
- "loss": 1.8219,
303
- "slid_loss": 1.9714,
304
- "step": 37,
305
- "time": 86.24
306
- },
307
- {
308
- "epoch": 0.3,
309
- "learning_rate": "5.0000e-06",
310
- "loss": 1.8075,
311
- "slid_loss": 1.9671,
312
- "step": 38,
313
- "time": 76.73
314
- },
315
- {
316
- "epoch": 0.31,
317
- "learning_rate": "5.0000e-06",
318
- "loss": 1.7785,
319
- "slid_loss": 1.9623,
320
- "step": 39,
321
- "time": 80.96
322
- },
323
- {
324
- "epoch": 0.31,
325
- "learning_rate": "5.0000e-06",
326
- "loss": 1.8296,
327
- "slid_loss": 1.959,
328
- "step": 40,
329
- "time": 83.93
330
- },
331
- {
332
- "epoch": 0.32,
333
- "learning_rate": "5.0000e-06",
334
- "loss": 1.7834,
335
- "slid_loss": 1.9547,
336
- "step": 41,
337
- "time": 77.98
338
- },
339
- {
340
- "epoch": 0.33,
341
- "learning_rate": "5.0000e-06",
342
- "loss": 1.7894,
343
- "slid_loss": 1.9507,
344
- "step": 42,
345
- "time": 82.3
346
- },
347
- {
348
- "epoch": 0.34,
349
- "learning_rate": "5.0000e-06",
350
- "loss": 1.8013,
351
- "slid_loss": 1.9473,
352
- "step": 43,
353
- "time": 84.45
354
- },
355
- {
356
- "epoch": 0.35,
357
- "learning_rate": "5.0000e-06",
358
- "loss": 1.7882,
359
- "slid_loss": 1.9436,
360
- "step": 44,
361
- "time": 78.67
362
- },
363
- {
364
- "epoch": 0.35,
365
- "learning_rate": "5.0000e-06",
366
- "loss": 1.7633,
367
- "slid_loss": 1.9396,
368
- "step": 45,
369
- "time": 79.58
370
- },
371
- {
372
- "epoch": 0.36,
373
- "learning_rate": "5.0000e-06",
374
- "loss": 1.8443,
375
- "slid_loss": 1.9376,
376
- "step": 46,
377
- "time": 79.13
378
- },
379
- {
380
- "epoch": 0.37,
381
- "learning_rate": "5.0000e-06",
382
- "loss": 1.7184,
383
- "slid_loss": 1.9329,
384
- "step": 47,
385
- "time": 78.73
386
- },
387
- {
388
- "epoch": 0.38,
389
- "learning_rate": "5.0000e-06",
390
- "loss": 1.7869,
391
- "slid_loss": 1.9299,
392
- "step": 48,
393
- "time": 77.96
394
- },
395
- {
396
- "epoch": 0.38,
397
- "learning_rate": "5.0000e-06",
398
- "loss": 1.8581,
399
- "slid_loss": 1.9284,
400
- "step": 49,
401
- "time": 84.5
402
- },
403
- {
404
- "epoch": 0.39,
405
- "learning_rate": "5.0000e-06",
406
- "loss": 1.7501,
407
- "slid_loss": 1.9248,
408
- "step": 50,
409
- "time": 79.17
410
- },
411
- {
412
- "epoch": 0.4,
413
- "learning_rate": "5.0000e-06",
414
- "loss": 1.7825,
415
- "slid_loss": 1.922,
416
- "step": 51,
417
- "time": 199.7
418
- },
419
- {
420
- "epoch": 0.41,
421
- "learning_rate": "5.0000e-06",
422
- "loss": 1.863,
423
- "slid_loss": 1.9209,
424
- "step": 52,
425
- "time": 81.1
426
- },
427
- {
428
- "epoch": 0.42,
429
- "learning_rate": "5.0000e-06",
430
- "loss": 1.8452,
431
- "slid_loss": 1.9195,
432
- "step": 53,
433
- "time": 77.93
434
- },
435
- {
436
- "epoch": 0.42,
437
- "learning_rate": "5.0000e-06",
438
- "loss": 1.8031,
439
- "slid_loss": 1.9173,
440
- "step": 54,
441
- "time": 78.92
442
- },
443
- {
444
- "epoch": 0.43,
445
- "learning_rate": "5.0000e-06",
446
- "loss": 1.7708,
447
- "slid_loss": 1.9147,
448
- "step": 55,
449
- "time": 77.85
450
- },
451
- {
452
- "epoch": 0.44,
453
- "learning_rate": "5.0000e-06",
454
- "loss": 1.7764,
455
- "slid_loss": 1.9122,
456
- "step": 56,
457
- "time": 76.81
458
- },
459
- {
460
- "epoch": 0.45,
461
- "learning_rate": "5.0000e-06",
462
- "loss": 1.7854,
463
- "slid_loss": 1.91,
464
- "step": 57,
465
- "time": 80.48
466
- },
467
- {
468
- "epoch": 0.46,
469
- "learning_rate": "5.0000e-06",
470
- "loss": 1.7826,
471
- "slid_loss": 1.9078,
472
- "step": 58,
473
- "time": 77.28
474
- },
475
- {
476
- "epoch": 0.46,
477
- "learning_rate": "5.0000e-06",
478
- "loss": 1.8057,
479
- "slid_loss": 1.906,
480
- "step": 59,
481
- "time": 77.73
482
- },
483
- {
484
- "epoch": 0.47,
485
- "learning_rate": "5.0000e-06",
486
- "loss": 1.7842,
487
- "slid_loss": 1.904,
488
- "step": 60,
489
- "time": 80.25
490
- },
491
- {
492
- "epoch": 0.48,
493
- "learning_rate": "5.0000e-06",
494
- "loss": 1.8633,
495
- "slid_loss": 1.9033,
496
- "step": 61,
497
- "time": 83.66
498
- },
499
- {
500
- "epoch": 0.49,
501
- "learning_rate": "5.0000e-06",
502
- "loss": 1.7775,
503
- "slid_loss": 1.9013,
504
- "step": 62,
505
- "time": 83.44
506
- },
507
- {
508
- "epoch": 0.49,
509
- "learning_rate": "5.0000e-06",
510
- "loss": 1.7193,
511
- "slid_loss": 1.8984,
512
- "step": 63,
513
- "time": 87.14
514
- },
515
- {
516
- "epoch": 0.5,
517
- "learning_rate": "5.0000e-06",
518
- "loss": 1.7909,
519
- "slid_loss": 1.8967,
520
- "step": 64,
521
- "time": 83.19
522
- },
523
- {
524
- "epoch": 0.51,
525
- "learning_rate": "5.0000e-06",
526
- "loss": 1.7992,
527
- "slid_loss": 1.8952,
528
- "step": 65,
529
- "time": 80.02
530
- },
531
- {
532
- "epoch": 0.52,
533
- "learning_rate": "5.0000e-06",
534
- "loss": 1.8568,
535
- "slid_loss": 1.8947,
536
- "step": 66,
537
- "time": 82.08
538
- },
539
- {
540
- "epoch": 0.53,
541
- "learning_rate": "5.0000e-06",
542
- "loss": 1.8153,
543
- "slid_loss": 1.8935,
544
- "step": 67,
545
- "time": 81.18
546
- },
547
- {
548
- "epoch": 0.53,
549
- "learning_rate": "5.0000e-06",
550
- "loss": 1.8354,
551
- "slid_loss": 1.8926,
552
- "step": 68,
553
- "time": 80.32
554
- },
555
- {
556
- "epoch": 0.54,
557
- "learning_rate": "5.0000e-06",
558
- "loss": 1.8226,
559
- "slid_loss": 1.8916,
560
- "step": 69,
561
- "time": 80.34
562
- },
563
- {
564
- "epoch": 0.55,
565
- "learning_rate": "5.0000e-06",
566
- "loss": 1.7428,
567
- "slid_loss": 1.8895,
568
- "step": 70,
569
- "time": 81.95
570
- },
571
- {
572
- "epoch": 0.56,
573
- "learning_rate": "5.0000e-06",
574
- "loss": 1.7535,
575
- "slid_loss": 1.8876,
576
- "step": 71,
577
- "time": 79.71
578
- },
579
- {
580
- "epoch": 0.57,
581
- "learning_rate": "5.0000e-06",
582
- "loss": 1.7228,
583
- "slid_loss": 1.8853,
584
- "step": 72,
585
- "time": 81.39
586
- },
587
- {
588
- "epoch": 0.57,
589
- "learning_rate": "5.0000e-06",
590
- "loss": 1.768,
591
- "slid_loss": 1.8837,
592
- "step": 73,
593
- "time": 83.67
594
- },
595
- {
596
- "epoch": 0.58,
597
- "learning_rate": "5.0000e-06",
598
- "loss": 1.8065,
599
- "slid_loss": 1.8826,
600
- "step": 74,
601
- "time": 84.41
602
- },
603
- {
604
- "epoch": 0.59,
605
- "learning_rate": "5.0000e-06",
606
- "loss": 1.8008,
607
- "slid_loss": 1.8815,
608
- "step": 75,
609
- "time": 80.91
610
- },
611
- {
612
- "epoch": 0.6,
613
- "learning_rate": "5.0000e-06",
614
- "loss": 1.8525,
615
- "slid_loss": 1.8812,
616
- "step": 76,
617
- "time": 84.8
618
- },
619
- {
620
- "epoch": 0.6,
621
- "learning_rate": "5.0000e-06",
622
- "loss": 1.7307,
623
- "slid_loss": 1.8792,
624
- "step": 77,
625
- "time": 78.98
626
- },
627
- {
628
- "epoch": 0.61,
629
- "learning_rate": "5.0000e-06",
630
- "loss": 1.7338,
631
- "slid_loss": 1.8773,
632
- "step": 78,
633
- "time": 84.67
634
- },
635
- {
636
- "epoch": 0.62,
637
- "learning_rate": "5.0000e-06",
638
- "loss": 1.7968,
639
- "slid_loss": 1.8763,
640
- "step": 79,
641
- "time": 78.49
642
- },
643
- {
644
- "epoch": 0.63,
645
- "learning_rate": "5.0000e-06",
646
- "loss": 1.7787,
647
- "slid_loss": 1.8751,
648
- "step": 80,
649
- "time": 82.89
650
- },
651
- {
652
- "epoch": 0.64,
653
- "learning_rate": "5.0000e-06",
654
- "loss": 1.779,
655
- "slid_loss": 1.8739,
656
- "step": 81,
657
- "time": 78.23
658
- },
659
- {
660
- "epoch": 0.64,
661
- "learning_rate": "5.0000e-06",
662
- "loss": 1.7907,
663
- "slid_loss": 1.8729,
664
- "step": 82,
665
- "time": 81.05
666
- },
667
- {
668
- "epoch": 0.65,
669
- "learning_rate": "5.0000e-06",
670
- "loss": 1.7231,
671
- "slid_loss": 1.8711,
672
- "step": 83,
673
- "time": 79.99
674
- },
675
- {
676
- "epoch": 0.66,
677
- "learning_rate": "5.0000e-06",
678
- "loss": 1.7397,
679
- "slid_loss": 1.8695,
680
- "step": 84,
681
- "time": 79.98
682
- },
683
- {
684
- "epoch": 0.67,
685
- "learning_rate": "5.0000e-06",
686
- "loss": 1.7482,
687
- "slid_loss": 1.8681,
688
- "step": 85,
689
- "time": 79.09
690
- },
691
- {
692
- "epoch": 0.68,
693
- "learning_rate": "5.0000e-06",
694
- "loss": 1.7731,
695
- "slid_loss": 1.867,
696
- "step": 86,
697
- "time": 76.58
698
- },
699
- {
700
- "epoch": 0.68,
701
- "learning_rate": "5.0000e-06",
702
- "loss": 1.8358,
703
- "slid_loss": 1.8666,
704
- "step": 87,
705
- "time": 81.05
706
- },
707
- {
708
- "epoch": 0.69,
709
- "learning_rate": "5.0000e-06",
710
- "loss": 1.7569,
711
- "slid_loss": 1.8654,
712
- "step": 88,
713
- "time": 78.52
714
- },
715
- {
716
- "epoch": 0.7,
717
- "learning_rate": "5.0000e-06",
718
- "loss": 1.7772,
719
- "slid_loss": 1.8644,
720
- "step": 89,
721
- "time": 80.4
722
- },
723
- {
724
- "epoch": 0.71,
725
- "learning_rate": "5.0000e-06",
726
- "loss": 1.784,
727
- "slid_loss": 1.8635,
728
- "step": 90,
729
- "time": 86.12
730
- },
731
- {
732
- "epoch": 0.71,
733
- "learning_rate": "5.0000e-06",
734
- "loss": 1.7401,
735
- "slid_loss": 1.8621,
736
- "step": 91,
737
- "time": 80.56
738
- },
739
- {
740
- "epoch": 0.72,
741
- "learning_rate": "5.0000e-06",
742
- "loss": 1.7645,
743
- "slid_loss": 1.8611,
744
- "step": 92,
745
- "time": 76.77
746
- },
747
- {
748
- "epoch": 0.73,
749
- "learning_rate": "5.0000e-06",
750
- "loss": 1.7251,
751
- "slid_loss": 1.8596,
752
- "step": 93,
753
- "time": 80.72
754
- },
755
- {
756
- "epoch": 0.74,
757
- "learning_rate": "5.0000e-06",
758
- "loss": 1.792,
759
- "slid_loss": 1.8589,
760
- "step": 94,
761
- "time": 80.45
762
- },
763
- {
764
- "epoch": 0.75,
765
- "learning_rate": "5.0000e-06",
766
- "loss": 1.7834,
767
- "slid_loss": 1.8581,
768
- "step": 95,
769
- "time": 80.28
770
- },
771
- {
772
- "epoch": 0.75,
773
- "learning_rate": "5.0000e-06",
774
- "loss": 1.7851,
775
- "slid_loss": 1.8574,
776
- "step": 96,
777
- "time": 81.67
778
- },
779
- {
780
- "epoch": 0.76,
781
- "learning_rate": "5.0000e-06",
782
- "loss": 1.8255,
783
- "slid_loss": 1.857,
784
- "step": 97,
785
- "time": 77.73
786
- },
787
- {
788
- "epoch": 0.77,
789
- "learning_rate": "5.0000e-06",
790
- "loss": 1.8322,
791
- "slid_loss": 1.8568,
792
- "step": 98,
793
- "time": 79.82
794
- },
795
- {
796
- "epoch": 0.78,
797
- "learning_rate": "5.0000e-06",
798
- "loss": 1.7494,
799
- "slid_loss": 1.8557,
800
- "step": 99,
801
- "time": 83.73
802
- },
803
- {
804
- "epoch": 0.79,
805
- "learning_rate": "5.0000e-06",
806
- "loss": 1.8002,
807
- "slid_loss": 1.8551,
808
- "step": 100,
809
- "time": 80.04
810
- },
811
- {
812
- "epoch": 0.79,
813
- "learning_rate": "5.0000e-06",
814
- "loss": 1.6975,
815
- "slid_loss": 1.841,
816
- "step": 101,
817
- "time": 205.33
818
- },
819
- {
820
- "epoch": 0.8,
821
- "learning_rate": "5.0000e-06",
822
- "loss": 1.792,
823
- "slid_loss": 1.8306,
824
- "step": 102,
825
- "time": 86.16
826
- },
827
- {
828
- "epoch": 0.81,
829
- "learning_rate": "5.0000e-06",
830
- "loss": 1.8129,
831
- "slid_loss": 1.8201,
832
- "step": 103,
833
- "time": 80.84
834
- },
835
- {
836
- "epoch": 0.82,
837
- "learning_rate": "5.0000e-06",
838
- "loss": 1.7504,
839
- "slid_loss": 1.8121,
840
- "step": 104,
841
- "time": 79.26
842
- },
843
- {
844
- "epoch": 0.82,
845
- "learning_rate": "5.0000e-06",
846
- "loss": 1.688,
847
- "slid_loss": 1.8031,
848
- "step": 105,
849
- "time": 99.38
850
- },
851
- {
852
- "epoch": 0.83,
853
- "learning_rate": "5.0000e-06",
854
- "loss": 1.8118,
855
- "slid_loss": 1.8004,
856
- "step": 106,
857
- "time": 85.96
858
- },
859
- {
860
- "epoch": 0.84,
861
- "learning_rate": "5.0000e-06",
862
- "loss": 1.7048,
863
- "slid_loss": 1.7965,
864
- "step": 107,
865
- "time": 84.68
866
- },
867
- {
868
- "epoch": 0.85,
869
- "learning_rate": "5.0000e-06",
870
- "loss": 1.762,
871
- "slid_loss": 1.7949,
872
- "step": 108,
873
- "time": 79.12
874
- },
875
- {
876
- "epoch": 0.86,
877
- "learning_rate": "5.0000e-06",
878
- "loss": 1.799,
879
- "slid_loss": 1.7936,
880
- "step": 109,
881
- "time": 85.23
882
- },
883
- {
884
- "epoch": 0.86,
885
- "learning_rate": "5.0000e-06",
886
- "loss": 1.7582,
887
- "slid_loss": 1.793,
888
- "step": 110,
889
- "time": 85.47
890
- },
891
- {
892
- "epoch": 0.87,
893
- "learning_rate": "5.0000e-06",
894
- "loss": 1.7241,
895
- "slid_loss": 1.7919,
896
- "step": 111,
897
- "time": 81.73
898
- },
899
- {
900
- "epoch": 0.88,
901
- "learning_rate": "5.0000e-06",
902
- "loss": 1.7806,
903
- "slid_loss": 1.7917,
904
- "step": 112,
905
- "time": 85.27
906
- },
907
- {
908
- "epoch": 0.89,
909
- "learning_rate": "5.0000e-06",
910
- "loss": 1.7839,
911
- "slid_loss": 1.7909,
912
- "step": 113,
913
- "time": 78.94
914
- },
915
- {
916
- "epoch": 0.9,
917
- "learning_rate": "5.0000e-06",
918
- "loss": 1.789,
919
- "slid_loss": 1.7907,
920
- "step": 114,
921
- "time": 80.05
922
- },
923
- {
924
- "epoch": 0.9,
925
- "learning_rate": "5.0000e-06",
926
- "loss": 1.7831,
927
- "slid_loss": 1.7912,
928
- "step": 115,
929
- "time": 82.11
930
- },
931
- {
932
- "epoch": 0.91,
933
- "learning_rate": "5.0000e-06",
934
- "loss": 1.7619,
935
- "slid_loss": 1.7897,
936
- "step": 116,
937
- "time": 78.06
938
- },
939
- {
940
- "epoch": 0.92,
941
- "learning_rate": "5.0000e-06",
942
- "loss": 1.8384,
943
- "slid_loss": 1.7901,
944
- "step": 117,
945
- "time": 77.8
946
- },
947
- {
948
- "epoch": 0.93,
949
- "learning_rate": "5.0000e-06",
950
- "loss": 1.78,
951
- "slid_loss": 1.7904,
952
- "step": 118,
953
- "time": 81.19
954
- },
955
- {
956
- "epoch": 0.93,
957
- "learning_rate": "5.0000e-06",
958
- "loss": 1.7805,
959
- "slid_loss": 1.7901,
960
- "step": 119,
961
- "time": 83.57
962
- },
963
- {
964
- "epoch": 0.94,
965
- "learning_rate": "5.0000e-06",
966
- "loss": 1.7509,
967
- "slid_loss": 1.7899,
968
- "step": 120,
969
- "time": 78.54
970
- },
971
- {
972
- "epoch": 0.95,
973
- "learning_rate": "5.0000e-06",
974
- "loss": 1.7806,
975
- "slid_loss": 1.7897,
976
- "step": 121,
977
- "time": 83.97
978
- },
979
- {
980
- "epoch": 0.96,
981
- "learning_rate": "5.0000e-06",
982
- "loss": 1.7887,
983
- "slid_loss": 1.7899,
984
- "step": 122,
985
- "time": 83.74
986
- },
987
- {
988
- "epoch": 0.97,
989
- "learning_rate": "5.0000e-06",
990
- "loss": 1.7084,
991
- "slid_loss": 1.7893,
992
- "step": 123,
993
- "time": 78.85
994
- },
995
- {
996
- "epoch": 0.97,
997
- "learning_rate": "5.0000e-06",
998
- "loss": 1.7843,
999
- "slid_loss": 1.7889,
1000
- "step": 124,
1001
- "time": 79.91
1002
- },
1003
- {
1004
- "epoch": 0.98,
1005
- "learning_rate": "5.0000e-06",
1006
- "loss": 1.8215,
1007
- "slid_loss": 1.788,
1008
- "step": 125,
1009
- "time": 77.9
1010
- },
1011
- {
1012
- "epoch": 0.99,
1013
- "learning_rate": "5.0000e-06",
1014
- "loss": 1.8352,
1015
- "slid_loss": 1.7881,
1016
- "step": 126,
1017
- "time": 82.06
1018
- },
1019
- {
1020
- "epoch": 1.0,
1021
- "learning_rate": "5.0000e-06",
1022
- "loss": 1.7802,
1023
- "slid_loss": 1.7868,
1024
- "step": 127,
1025
- "time": 80.15
1026
- },
1027
- {
1028
- "epoch": 1.0,
1029
- "step": 127,
1030
- "time": 0.01,
1031
- "total_flos": 0.0,
1032
- "train_loss": 1.837487073395196,
1033
- "train_runtime": 10569.7638,
1034
- "train_samples_per_second": 12.332,
1035
- "train_steps_per_second": 0.012
1036
- }
1037
- ],
1038
- "logging_steps": 1.0,
1039
- "max_steps": 127,
1040
- "num_train_epochs": 1,
1041
- "save_steps": 50,
1042
- "total_flos": 0.0,
1043
- "trial_name": null,
1044
- "trial_params": null
1045
- }