DuongTrongChi commited on
Commit
b664cb2
1 Parent(s): dd5d6da

Training in progress, step 83, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65c76ec859b365599be866d9b318eaa5a3c0499ba81d80d398aa979cdfba1c6c
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c997ba06eb3c43b4beeaddc7da4021790886b44b0d7da5f2d90a4488a82d9d66
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e9b8c9e59c83006ff910343ed6eca79ef202fa67271f9cc6d22fd7e1e81e005
3
  size 50675156
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e5a5fa2804c3f250c6b3046be48e9a28dc91a2bd321a4a53cebc3472ff01ca
3
  size 50675156
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca3ec25a8057e7278bf9e7cd6eb504c67f0a3c505cd58623e3273b98ac77f202
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6c2c803e63719a38ab78de645a182ebd31fb27bd73de5f12bc1f2831c268ff7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2194787379972565,
5
  "eval_steps": 500,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -567,6 +567,27 @@
567
  "learning_rate": 0.00016,
568
  "loss": 1.1684,
569
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
570
  }
571
  ],
572
  "logging_steps": 1,
@@ -586,7 +607,7 @@
586
  "attributes": {}
587
  }
588
  },
589
- "total_flos": 2.29379718260736e+17,
590
  "train_batch_size": 16,
591
  "trial_name": null,
592
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.22770919067215364,
5
  "eval_steps": 500,
6
+ "global_step": 83,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
567
  "learning_rate": 0.00016,
568
  "loss": 1.1684,
569
  "step": 80
570
+ },
571
+ {
572
+ "epoch": 0.2222222222222222,
573
+ "grad_norm": 0.06477335095405579,
574
+ "learning_rate": 0.000162,
575
+ "loss": 1.1851,
576
+ "step": 81
577
+ },
578
+ {
579
+ "epoch": 0.22496570644718794,
580
+ "grad_norm": 0.0677405372262001,
581
+ "learning_rate": 0.000164,
582
+ "loss": 1.093,
583
+ "step": 82
584
+ },
585
+ {
586
+ "epoch": 0.22770919067215364,
587
+ "grad_norm": 0.06988447159528732,
588
+ "learning_rate": 0.000166,
589
+ "loss": 1.2542,
590
+ "step": 83
591
  }
592
  ],
593
  "logging_steps": 1,
 
607
  "attributes": {}
608
  }
609
  },
610
+ "total_flos": 2.3801435722727424e+17,
611
  "train_batch_size": 16,
612
  "trial_name": null,
613
  "trial_params": null