emilios commited on
Commit
c93577b
1 Parent(s): 09ed481
Files changed (21) hide show
  1. checkpoint-9000 +1 -0
  2. ds_config.json.not +50 -0
  3. ds_config.json.orig +50 -0
  4. pytorch_model.bin +1 -1
  5. run.sh +3 -3
  6. runs/Dec22_09-38-38_129-146-176-120/events.out.tfevents.1671701963.129-146-176-120.830862.0 +2 -2
  7. runs/Dec22_14-10-41_129-146-176-120/1671718288.5083408/events.out.tfevents.1671718288.129-146-176-120.835630.1 +3 -0
  8. runs/Dec22_14-10-41_129-146-176-120/events.out.tfevents.1671718288.129-146-176-120.835630.0 +3 -0
  9. runs/Dec22_14-15-43_129-146-176-120/1671718590.3176517/events.out.tfevents.1671718590.129-146-176-120.836086.1 +3 -0
  10. runs/Dec22_14-15-43_129-146-176-120/events.out.tfevents.1671718590.129-146-176-120.836086.0 +3 -0
  11. runs/Dec22_14-19-53_129-146-176-120/1671718840.647013/events.out.tfevents.1671718840.129-146-176-120.836864.1 +3 -0
  12. runs/Dec22_14-19-53_129-146-176-120/events.out.tfevents.1671718840.129-146-176-120.836864.0 +3 -0
  13. runs/Dec22_14-21-56_129-146-176-120/1671718963.776668/events.out.tfevents.1671718963.129-146-176-120.837337.1 +3 -0
  14. runs/Dec22_14-21-56_129-146-176-120/events.out.tfevents.1671718963.129-146-176-120.837337.0 +3 -0
  15. runs/Dec22_16-56-17_129-146-176-120/1671728230.4621089/events.out.tfevents.1671728230.129-146-176-120.840536.1 +3 -0
  16. runs/Dec22_16-56-17_129-146-176-120/events.out.tfevents.1671728230.129-146-176-120.840536.0 +3 -0
  17. runs/Dec22_17-04-43_129-146-176-120/1671728738.1289852/events.out.tfevents.1671728738.129-146-176-120.841203.1 +3 -0
  18. runs/Dec22_17-04-43_129-146-176-120/events.out.tfevents.1671728738.129-146-176-120.841203.0 +3 -0
  19. runs/Dec22_17-37-10_129-146-176-120/1671730676.7038546/events.out.tfevents.1671730676.129-146-176-120.841810.1 +3 -0
  20. runs/Dec22_17-37-10_129-146-176-120/events.out.tfevents.1671730676.129-146-176-120.841810.0 +3 -0
  21. training_args.bin +1 -1
checkpoint-9000 ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-12000
ds_config.json.not ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fp16": {
3
+ "enabled": "auto",
4
+ "loss_scale": 0,
5
+ "loss_scale_window": 1000,
6
+ "initial_scale_power": 16,
7
+ "hysteresis": 2,
8
+ "min_loss_scale": 1
9
+ },
10
+
11
+ "optimizer": {
12
+ "type": "AdamW",
13
+ "params": {
14
+ "lr": "auto",
15
+ "betas": "auto",
16
+ "eps": "auto",
17
+ "weight_decay": "auto"
18
+ }
19
+ },
20
+
21
+ "scheduler": {
22
+ "type": "OneCycle",
23
+ "params": {
24
+ "last_batch_iteration": -1,
25
+ "total_num_steps": "auto",
26
+ "warmup_min_lr": "auto",
27
+ "warmup_max_lr": "auto",
28
+ "warmup_num_steps": "auto"
29
+ }
30
+ },
31
+
32
+ "zero_optimization": {
33
+ "stage": 2,
34
+ "offload_optimizer": {
35
+ "device": "cpu",
36
+ "pin_memory": true
37
+ },
38
+ "allgather_partitions": true,
39
+ "allgather_bucket_size": 2e8,
40
+ "overlap_comm": true,
41
+ "reduce_scatter": true,
42
+ "reduce_bucket_size": 2e8,
43
+ "contiguous_gradients": true
44
+ },
45
+
46
+ "gradient_accumulation_steps": "auto",
47
+ "gradient_clipping": "auto",
48
+ "train_batch_size": "auto",
49
+ "train_micro_batch_size_per_gpu": "auto"
50
+ }
ds_config.json.orig ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fp16": {
3
+ "enabled": "auto",
4
+ "loss_scale": 0,
5
+ "loss_scale_window": 1000,
6
+ "initial_scale_power": 16,
7
+ "hysteresis": 2,
8
+ "min_loss_scale": 1
9
+ },
10
+
11
+ "optimizer": {
12
+ "type": "AdamW",
13
+ "params": {
14
+ "lr": "auto",
15
+ "betas": "auto",
16
+ "eps": "auto",
17
+ "weight_decay": "auto"
18
+ }
19
+ },
20
+
21
+ "scheduler": {
22
+ "type": "WarmupDecayLR",
23
+ "params": {
24
+ "last_batch_iteration": -1,
25
+ "total_num_steps": "auto",
26
+ "warmup_min_lr": "auto",
27
+ "warmup_max_lr": "auto",
28
+ "warmup_num_steps": "auto"
29
+ }
30
+ },
31
+
32
+ "zero_optimization": {
33
+ "stage": 2,
34
+ "offload_optimizer": {
35
+ "device": "cpu",
36
+ "pin_memory": true
37
+ },
38
+ "allgather_partitions": true,
39
+ "allgather_bucket_size": 2e8,
40
+ "overlap_comm": true,
41
+ "reduce_scatter": true,
42
+ "reduce_bucket_size": 2e8,
43
+ "contiguous_gradients": true
44
+ },
45
+
46
+ "gradient_accumulation_steps": "auto",
47
+ "gradient_clipping": "auto",
48
+ "train_batch_size": "auto",
49
+ "train_micro_batch_size_per_gpu": "auto"
50
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0083259db3b58a2a002a48614b01db92f9a0d63a6d02a7aeff5ba6e221b37e9a
3
  size 1527847357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da3cc32424000ff954bf49215879ed2e1a0d4eaab55388c0687d6ddcca9269e4
3
  size 1527847357
run.sh CHANGED
@@ -10,13 +10,13 @@ deepspeed run-ba.py \
10
  --text_column_name="sentence" \
11
  --streaming="False" \
12
  --ignore_data_skip \
13
- --resume_from_checkpoint="checkpoint-18000" \
14
  --torch_compile="True" \
15
  --torch_compile_mode="reduce-overhead" \
16
  --torch_compile_mode="max-autotune" \
17
  --logging_steps="25" \
18
- --learning_rate="4e-4" \
19
- --max_steps="18000" \
20
  --output_dir="./" \
21
  --per_device_train_batch_size="32" \
22
  --gradient_accumulation_steps="1" \
 
10
  --text_column_name="sentence" \
11
  --streaming="False" \
12
  --ignore_data_skip \
13
+ --resume_from_checkpoint="checkpoint-19000" \
14
  --torch_compile="True" \
15
  --torch_compile_mode="reduce-overhead" \
16
  --torch_compile_mode="max-autotune" \
17
  --logging_steps="25" \
18
+ --learning_rate="3e-6" \
19
+ --max_steps="19000" \
20
  --output_dir="./" \
21
  --per_device_train_batch_size="32" \
22
  --gradient_accumulation_steps="1" \
runs/Dec22_09-38-38_129-146-176-120/events.out.tfevents.1671701963.129-146-176-120.830862.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ffcecc9660bb3d4c00e6ad3958a85ea665cc118c17ec5d5e6d0a9311cf60ec2
3
- size 30917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a0ec69676229e7bc70c0d6e6c94e611da3c7a90c0c1fa9747a91d9e37a4d74c
3
+ size 32037
runs/Dec22_14-10-41_129-146-176-120/1671718288.5083408/events.out.tfevents.1671718288.129-146-176-120.835630.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f4f6f2a4beb435019093dbcd4c8abb32e696efe2dff30b9eaeb913c14416c12
3
+ size 5905
runs/Dec22_14-10-41_129-146-176-120/events.out.tfevents.1671718288.129-146-176-120.835630.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a513df02fbbacc5d41fbb60427a81d7c8c33d56bae2578bc969856641b3ea21a
3
+ size 4678
runs/Dec22_14-15-43_129-146-176-120/1671718590.3176517/events.out.tfevents.1671718590.129-146-176-120.836086.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:950a76771993ccc841ccffde75e5143e8b0cf5edc76fe70c4702c9b5ed6030ee
3
+ size 5905
runs/Dec22_14-15-43_129-146-176-120/events.out.tfevents.1671718590.129-146-176-120.836086.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f379df2d89232e9bb9dd50e2f178d9a94e83819c9f0f81ebe5b5ee6bf90219c1
3
+ size 4678
runs/Dec22_14-19-53_129-146-176-120/1671718840.647013/events.out.tfevents.1671718840.129-146-176-120.836864.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:828dc53a152bb05b8f428b4ab65624341c7d0f3116a9002121814941a0fcae10
3
+ size 5905
runs/Dec22_14-19-53_129-146-176-120/events.out.tfevents.1671718840.129-146-176-120.836864.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b685793e7fe2ed760f913188c41ae0f3610de9c5cb263d7f206ce162487bbb8c
3
+ size 4678
runs/Dec22_14-21-56_129-146-176-120/1671718963.776668/events.out.tfevents.1671718963.129-146-176-120.837337.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94e58cedb662a8b9a2a8c9ef5c515126c3d6102d10f858353515ddca18667967
3
+ size 5905
runs/Dec22_14-21-56_129-146-176-120/events.out.tfevents.1671718963.129-146-176-120.837337.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a15d31a27be449c45bd148756805bec717a045e31ff0090abde0520ac266e2bd
3
+ size 11042
runs/Dec22_16-56-17_129-146-176-120/1671728230.4621089/events.out.tfevents.1671728230.129-146-176-120.840536.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e86f74eabb164e217396adb3862926f83e2c8a446b10cb92fda92f0f9fc5532
3
+ size 5905
runs/Dec22_16-56-17_129-146-176-120/events.out.tfevents.1671728230.129-146-176-120.840536.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e264c48998d67d6ca5bca594f64f055d87f2a97102c8d89fd4b70a7c9db61850
3
+ size 4638
runs/Dec22_17-04-43_129-146-176-120/1671728738.1289852/events.out.tfevents.1671728738.129-146-176-120.841203.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce53ac1d02a6acf4997476350f245b82bee6f7f26233d548a4a3a5085713ae3a
3
+ size 5905
runs/Dec22_17-04-43_129-146-176-120/events.out.tfevents.1671728738.129-146-176-120.841203.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fe60c4ccd276569fd01d5fa92ffc2a944bec97d261b1125b67113834c9996b3
3
+ size 7357
runs/Dec22_17-37-10_129-146-176-120/1671730676.7038546/events.out.tfevents.1671730676.129-146-176-120.841810.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:895cbc5c6787d31e9e56c0417e517f2e43c1f63caea7e530091f1a1fe6482c68
3
+ size 5905
runs/Dec22_17-37-10_129-146-176-120/events.out.tfevents.1671730676.129-146-176-120.841810.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e0820296075c022e4d6cd6f4a652cbfcd2060afb34384706c6a0e277cbf55ba
3
+ size 4677
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6e3ac4aeab20cf895e188b7a0ae60077219ad0067d587dfa1da35e123e14fa0
3
  size 4795
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdea1c10d1f1ed110f88766aa9ad66df2cba1add4701d0d9538b9250ae9331e7
3
  size 4795