DuongTrongChi commited on
Commit
333e818
1 Parent(s): 8b76147

Training in progress, step 77, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fba76b36e590e7e13470a157729235b7d34fd5f65cd77648e42ec57c79892a5d
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cb0496349c7b965d5745b83667dddba6d9df375bcaa24821f880398882683c0
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c48dc9c8c4cf094cfe9ca510b0ee6256aee9062c39b4df8bc829e031d6c096d6
3
  size 50675156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0888da8e641e6eaedf98307dcd8e2a8af3dfb612ef7f810d4e6ca8bcadf0e4e
3
  size 50675156
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46a86174539a86c9c10ef9b72608d49039cdad58e77dd25141a021b27f07e927
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:725ee499aaa0bc04b490ac3af0c734c514c976dd8cd2f204b00fdb43d2a90bf8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2030178326474623,
5
  "eval_steps": 500,
6
- "global_step": 74,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -525,6 +525,27 @@
525
  "learning_rate": 0.000148,
526
  "loss": 1.1915,
527
  "step": 74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
528
  }
529
  ],
530
  "logging_steps": 1,
@@ -544,7 +565,7 @@
544
  "attributes": {}
545
  }
546
  },
547
- "total_flos": 2.1144350488087757e+17,
548
  "train_batch_size": 16,
549
  "trial_name": null,
550
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2112482853223594,
5
  "eval_steps": 500,
6
+ "global_step": 77,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
525
  "learning_rate": 0.000148,
526
  "loss": 1.1915,
527
  "step": 74
528
+ },
529
+ {
530
+ "epoch": 0.205761316872428,
531
+ "grad_norm": 0.0682872086763382,
532
+ "learning_rate": 0.00015000000000000001,
533
+ "loss": 1.2017,
534
+ "step": 75
535
+ },
536
+ {
537
+ "epoch": 0.2085048010973937,
538
+ "grad_norm": 0.07075867056846619,
539
+ "learning_rate": 0.000152,
540
+ "loss": 1.1562,
541
+ "step": 76
542
+ },
543
+ {
544
+ "epoch": 0.2112482853223594,
545
+ "grad_norm": 0.06364033371210098,
546
+ "learning_rate": 0.000154,
547
+ "loss": 1.1936,
548
+ "step": 77
549
  }
550
  ],
551
  "logging_steps": 1,
 
565
  "attributes": {}
566
  }
567
  },
568
+ "total_flos": 2.202346458742948e+17,
569
  "train_batch_size": 16,
570
  "trial_name": null,
571
  "trial_params": null