nadahlberg
commited on
Commit
•
8620392
1
Parent(s):
494d95c
Training in progress, step 196000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1411 -3
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 325690872
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9464f6573ad9fd0fe2e6b58a6ea1736b5ba18805ac93432a1bb8af13c8d7174
|
3 |
size 325690872
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 651550778
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:654add82a11ead5fc284c78b157de8301575e0da3004fc15763bfafcf3e21317
|
3 |
size 651550778
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b0c6e8b9d02c7e6dd2be7b0f7ff77b7a7ace2a942f56de98bb049da9978609d
|
3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:858fad4a57c20e3bc6f87991b2d51b447444dda687b4fac3b65e93012d0b92cd
|
3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:76f6e9796e8b0ab01b947c4c554b7dfb713baa7446393d60bcd9dc2794008bb7
|
3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1c1cfbbca38bba0f754721006c8a7b6bd2f8ef74bde2398b49043aeae06b4a9
|
3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3fde9f19aef78c9d1abf97fb0d1ff70877dbd6c047afe3d95a62bc334beb08d
|
3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bba636fe487fa05c4f12884126f347d92998e10436b775c61ed62a0845edaee6
|
3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f6eac7b111b6403d7a0511e9ed7b6c204f504a39330847af53892c04cfc0c5e
|
3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29d7b0fd82697b0b10cee8d8f5187a1d86168f8dae62bf20c5fcfda6edc689db
|
3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3168476b794df8c7f905068d035500e7de63c824f43b21661ba9cef3202443fe
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 2000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -136583,6 +136583,1414 @@
|
|
136583 |
"eval_samples_per_second": 53.049,
|
136584 |
"eval_steps_per_second": 0.106,
|
136585 |
"step": 194000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136586 |
}
|
136587 |
],
|
136588 |
"logging_steps": 10,
|
@@ -136602,7 +138010,7 @@
|
|
136602 |
"attributes": {}
|
136603 |
}
|
136604 |
},
|
136605 |
-
"total_flos": 5.
|
136606 |
"train_batch_size": 64,
|
136607 |
"trial_name": null,
|
136608 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.98,
|
5 |
"eval_steps": 2000,
|
6 |
+
"global_step": 196000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
136583 |
"eval_samples_per_second": 53.049,
|
136584 |
"eval_steps_per_second": 0.106,
|
136585 |
"step": 194000
|
136586 |
+
},
|
136587 |
+
{
|
136588 |
+
"epoch": 0.97005,
|
136589 |
+
"grad_norm": 0.62890625,
|
136590 |
+
"learning_rate": 9.030150753768845e-05,
|
136591 |
+
"loss": 2.0384,
|
136592 |
+
"step": 194010
|
136593 |
+
},
|
136594 |
+
{
|
136595 |
+
"epoch": 0.9701,
|
136596 |
+
"grad_norm": 0.6796875,
|
136597 |
+
"learning_rate": 9.015075376884423e-05,
|
136598 |
+
"loss": 2.0706,
|
136599 |
+
"step": 194020
|
136600 |
+
},
|
136601 |
+
{
|
136602 |
+
"epoch": 0.97015,
|
136603 |
+
"grad_norm": 0.53125,
|
136604 |
+
"learning_rate": 8.999999999999999e-05,
|
136605 |
+
"loss": 2.0821,
|
136606 |
+
"step": 194030
|
136607 |
+
},
|
136608 |
+
{
|
136609 |
+
"epoch": 0.9702,
|
136610 |
+
"grad_norm": 0.578125,
|
136611 |
+
"learning_rate": 8.984924623115577e-05,
|
136612 |
+
"loss": 2.1075,
|
136613 |
+
"step": 194040
|
136614 |
+
},
|
136615 |
+
{
|
136616 |
+
"epoch": 0.97025,
|
136617 |
+
"grad_norm": 0.6328125,
|
136618 |
+
"learning_rate": 8.969849246231155e-05,
|
136619 |
+
"loss": 2.0734,
|
136620 |
+
"step": 194050
|
136621 |
+
},
|
136622 |
+
{
|
136623 |
+
"epoch": 0.9703,
|
136624 |
+
"grad_norm": 0.62109375,
|
136625 |
+
"learning_rate": 8.954773869346733e-05,
|
136626 |
+
"loss": 2.0926,
|
136627 |
+
"step": 194060
|
136628 |
+
},
|
136629 |
+
{
|
136630 |
+
"epoch": 0.97035,
|
136631 |
+
"grad_norm": 0.65625,
|
136632 |
+
"learning_rate": 8.939698492462311e-05,
|
136633 |
+
"loss": 2.1112,
|
136634 |
+
"step": 194070
|
136635 |
+
},
|
136636 |
+
{
|
136637 |
+
"epoch": 0.9704,
|
136638 |
+
"grad_norm": 0.671875,
|
136639 |
+
"learning_rate": 8.924623115577889e-05,
|
136640 |
+
"loss": 2.0451,
|
136641 |
+
"step": 194080
|
136642 |
+
},
|
136643 |
+
{
|
136644 |
+
"epoch": 0.97045,
|
136645 |
+
"grad_norm": 0.59375,
|
136646 |
+
"learning_rate": 8.909547738693467e-05,
|
136647 |
+
"loss": 2.0717,
|
136648 |
+
"step": 194090
|
136649 |
+
},
|
136650 |
+
{
|
136651 |
+
"epoch": 0.9705,
|
136652 |
+
"grad_norm": 0.6796875,
|
136653 |
+
"learning_rate": 8.894472361809045e-05,
|
136654 |
+
"loss": 2.0684,
|
136655 |
+
"step": 194100
|
136656 |
+
},
|
136657 |
+
{
|
136658 |
+
"epoch": 0.97055,
|
136659 |
+
"grad_norm": 0.61328125,
|
136660 |
+
"learning_rate": 8.879396984924623e-05,
|
136661 |
+
"loss": 2.1276,
|
136662 |
+
"step": 194110
|
136663 |
+
},
|
136664 |
+
{
|
136665 |
+
"epoch": 0.9706,
|
136666 |
+
"grad_norm": 0.74609375,
|
136667 |
+
"learning_rate": 8.864321608040201e-05,
|
136668 |
+
"loss": 2.0602,
|
136669 |
+
"step": 194120
|
136670 |
+
},
|
136671 |
+
{
|
136672 |
+
"epoch": 0.97065,
|
136673 |
+
"grad_norm": 0.61328125,
|
136674 |
+
"learning_rate": 8.849246231155779e-05,
|
136675 |
+
"loss": 2.1114,
|
136676 |
+
"step": 194130
|
136677 |
+
},
|
136678 |
+
{
|
136679 |
+
"epoch": 0.9707,
|
136680 |
+
"grad_norm": 0.609375,
|
136681 |
+
"learning_rate": 8.834170854271357e-05,
|
136682 |
+
"loss": 2.0791,
|
136683 |
+
"step": 194140
|
136684 |
+
},
|
136685 |
+
{
|
136686 |
+
"epoch": 0.97075,
|
136687 |
+
"grad_norm": 0.59765625,
|
136688 |
+
"learning_rate": 8.819095477386935e-05,
|
136689 |
+
"loss": 2.0548,
|
136690 |
+
"step": 194150
|
136691 |
+
},
|
136692 |
+
{
|
136693 |
+
"epoch": 0.9708,
|
136694 |
+
"grad_norm": 0.55859375,
|
136695 |
+
"learning_rate": 8.804020100502513e-05,
|
136696 |
+
"loss": 2.0775,
|
136697 |
+
"step": 194160
|
136698 |
+
},
|
136699 |
+
{
|
136700 |
+
"epoch": 0.97085,
|
136701 |
+
"grad_norm": 0.68359375,
|
136702 |
+
"learning_rate": 8.788944723618091e-05,
|
136703 |
+
"loss": 2.0738,
|
136704 |
+
"step": 194170
|
136705 |
+
},
|
136706 |
+
{
|
136707 |
+
"epoch": 0.9709,
|
136708 |
+
"grad_norm": 0.61328125,
|
136709 |
+
"learning_rate": 8.773869346733669e-05,
|
136710 |
+
"loss": 2.1551,
|
136711 |
+
"step": 194180
|
136712 |
+
},
|
136713 |
+
{
|
136714 |
+
"epoch": 0.97095,
|
136715 |
+
"grad_norm": 0.6328125,
|
136716 |
+
"learning_rate": 8.758793969849247e-05,
|
136717 |
+
"loss": 2.0596,
|
136718 |
+
"step": 194190
|
136719 |
+
},
|
136720 |
+
{
|
136721 |
+
"epoch": 0.971,
|
136722 |
+
"grad_norm": 0.609375,
|
136723 |
+
"learning_rate": 8.743718592964825e-05,
|
136724 |
+
"loss": 2.0845,
|
136725 |
+
"step": 194200
|
136726 |
+
},
|
136727 |
+
{
|
136728 |
+
"epoch": 0.97105,
|
136729 |
+
"grad_norm": 0.671875,
|
136730 |
+
"learning_rate": 8.728643216080403e-05,
|
136731 |
+
"loss": 2.0366,
|
136732 |
+
"step": 194210
|
136733 |
+
},
|
136734 |
+
{
|
136735 |
+
"epoch": 0.9711,
|
136736 |
+
"grad_norm": 0.63671875,
|
136737 |
+
"learning_rate": 8.713567839195981e-05,
|
136738 |
+
"loss": 2.088,
|
136739 |
+
"step": 194220
|
136740 |
+
},
|
136741 |
+
{
|
136742 |
+
"epoch": 0.97115,
|
136743 |
+
"grad_norm": 0.66015625,
|
136744 |
+
"learning_rate": 8.698492462311559e-05,
|
136745 |
+
"loss": 2.0873,
|
136746 |
+
"step": 194230
|
136747 |
+
},
|
136748 |
+
{
|
136749 |
+
"epoch": 0.9712,
|
136750 |
+
"grad_norm": 0.5625,
|
136751 |
+
"learning_rate": 8.683417085427135e-05,
|
136752 |
+
"loss": 2.0475,
|
136753 |
+
"step": 194240
|
136754 |
+
},
|
136755 |
+
{
|
136756 |
+
"epoch": 0.97125,
|
136757 |
+
"grad_norm": 0.64453125,
|
136758 |
+
"learning_rate": 8.668341708542713e-05,
|
136759 |
+
"loss": 2.066,
|
136760 |
+
"step": 194250
|
136761 |
+
},
|
136762 |
+
{
|
136763 |
+
"epoch": 0.9713,
|
136764 |
+
"grad_norm": 0.625,
|
136765 |
+
"learning_rate": 8.653266331658291e-05,
|
136766 |
+
"loss": 2.0245,
|
136767 |
+
"step": 194260
|
136768 |
+
},
|
136769 |
+
{
|
136770 |
+
"epoch": 0.97135,
|
136771 |
+
"grad_norm": 0.56640625,
|
136772 |
+
"learning_rate": 8.638190954773869e-05,
|
136773 |
+
"loss": 2.1035,
|
136774 |
+
"step": 194270
|
136775 |
+
},
|
136776 |
+
{
|
136777 |
+
"epoch": 0.9714,
|
136778 |
+
"grad_norm": 0.63671875,
|
136779 |
+
"learning_rate": 8.623115577889447e-05,
|
136780 |
+
"loss": 2.0527,
|
136781 |
+
"step": 194280
|
136782 |
+
},
|
136783 |
+
{
|
136784 |
+
"epoch": 0.97145,
|
136785 |
+
"grad_norm": 0.671875,
|
136786 |
+
"learning_rate": 8.608040201005025e-05,
|
136787 |
+
"loss": 2.063,
|
136788 |
+
"step": 194290
|
136789 |
+
},
|
136790 |
+
{
|
136791 |
+
"epoch": 0.9715,
|
136792 |
+
"grad_norm": 0.671875,
|
136793 |
+
"learning_rate": 8.592964824120603e-05,
|
136794 |
+
"loss": 2.0161,
|
136795 |
+
"step": 194300
|
136796 |
+
},
|
136797 |
+
{
|
136798 |
+
"epoch": 0.97155,
|
136799 |
+
"grad_norm": 0.61328125,
|
136800 |
+
"learning_rate": 8.577889447236181e-05,
|
136801 |
+
"loss": 2.1606,
|
136802 |
+
"step": 194310
|
136803 |
+
},
|
136804 |
+
{
|
136805 |
+
"epoch": 0.9716,
|
136806 |
+
"grad_norm": 0.640625,
|
136807 |
+
"learning_rate": 8.562814070351759e-05,
|
136808 |
+
"loss": 2.0762,
|
136809 |
+
"step": 194320
|
136810 |
+
},
|
136811 |
+
{
|
136812 |
+
"epoch": 0.97165,
|
136813 |
+
"grad_norm": 0.58984375,
|
136814 |
+
"learning_rate": 8.547738693467337e-05,
|
136815 |
+
"loss": 2.0266,
|
136816 |
+
"step": 194330
|
136817 |
+
},
|
136818 |
+
{
|
136819 |
+
"epoch": 0.9717,
|
136820 |
+
"grad_norm": 0.6171875,
|
136821 |
+
"learning_rate": 8.532663316582915e-05,
|
136822 |
+
"loss": 2.1165,
|
136823 |
+
"step": 194340
|
136824 |
+
},
|
136825 |
+
{
|
136826 |
+
"epoch": 0.97175,
|
136827 |
+
"grad_norm": 0.609375,
|
136828 |
+
"learning_rate": 8.517587939698493e-05,
|
136829 |
+
"loss": 2.0723,
|
136830 |
+
"step": 194350
|
136831 |
+
},
|
136832 |
+
{
|
136833 |
+
"epoch": 0.9718,
|
136834 |
+
"grad_norm": 0.6171875,
|
136835 |
+
"learning_rate": 8.502512562814071e-05,
|
136836 |
+
"loss": 2.0379,
|
136837 |
+
"step": 194360
|
136838 |
+
},
|
136839 |
+
{
|
136840 |
+
"epoch": 0.97185,
|
136841 |
+
"grad_norm": 0.64453125,
|
136842 |
+
"learning_rate": 8.487437185929649e-05,
|
136843 |
+
"loss": 2.0648,
|
136844 |
+
"step": 194370
|
136845 |
+
},
|
136846 |
+
{
|
136847 |
+
"epoch": 0.9719,
|
136848 |
+
"grad_norm": 0.60546875,
|
136849 |
+
"learning_rate": 8.472361809045227e-05,
|
136850 |
+
"loss": 2.0479,
|
136851 |
+
"step": 194380
|
136852 |
+
},
|
136853 |
+
{
|
136854 |
+
"epoch": 0.97195,
|
136855 |
+
"grad_norm": 0.5859375,
|
136856 |
+
"learning_rate": 8.457286432160805e-05,
|
136857 |
+
"loss": 2.0829,
|
136858 |
+
"step": 194390
|
136859 |
+
},
|
136860 |
+
{
|
136861 |
+
"epoch": 0.972,
|
136862 |
+
"grad_norm": 0.57421875,
|
136863 |
+
"learning_rate": 8.442211055276383e-05,
|
136864 |
+
"loss": 2.0655,
|
136865 |
+
"step": 194400
|
136866 |
+
},
|
136867 |
+
{
|
136868 |
+
"epoch": 0.97205,
|
136869 |
+
"grad_norm": 0.63671875,
|
136870 |
+
"learning_rate": 8.427135678391961e-05,
|
136871 |
+
"loss": 2.1013,
|
136872 |
+
"step": 194410
|
136873 |
+
},
|
136874 |
+
{
|
136875 |
+
"epoch": 0.9721,
|
136876 |
+
"grad_norm": 0.66796875,
|
136877 |
+
"learning_rate": 8.412060301507539e-05,
|
136878 |
+
"loss": 2.0431,
|
136879 |
+
"step": 194420
|
136880 |
+
},
|
136881 |
+
{
|
136882 |
+
"epoch": 0.97215,
|
136883 |
+
"grad_norm": 0.62890625,
|
136884 |
+
"learning_rate": 8.396984924623117e-05,
|
136885 |
+
"loss": 2.0636,
|
136886 |
+
"step": 194430
|
136887 |
+
},
|
136888 |
+
{
|
136889 |
+
"epoch": 0.9722,
|
136890 |
+
"grad_norm": 0.58203125,
|
136891 |
+
"learning_rate": 8.381909547738693e-05,
|
136892 |
+
"loss": 2.0263,
|
136893 |
+
"step": 194440
|
136894 |
+
},
|
136895 |
+
{
|
136896 |
+
"epoch": 0.97225,
|
136897 |
+
"grad_norm": 0.60546875,
|
136898 |
+
"learning_rate": 8.366834170854271e-05,
|
136899 |
+
"loss": 2.1033,
|
136900 |
+
"step": 194450
|
136901 |
+
},
|
136902 |
+
{
|
136903 |
+
"epoch": 0.9723,
|
136904 |
+
"grad_norm": 0.62890625,
|
136905 |
+
"learning_rate": 8.351758793969849e-05,
|
136906 |
+
"loss": 2.0468,
|
136907 |
+
"step": 194460
|
136908 |
+
},
|
136909 |
+
{
|
136910 |
+
"epoch": 0.97235,
|
136911 |
+
"grad_norm": 0.5625,
|
136912 |
+
"learning_rate": 8.336683417085427e-05,
|
136913 |
+
"loss": 2.0634,
|
136914 |
+
"step": 194470
|
136915 |
+
},
|
136916 |
+
{
|
136917 |
+
"epoch": 0.9724,
|
136918 |
+
"grad_norm": 0.609375,
|
136919 |
+
"learning_rate": 8.321608040201005e-05,
|
136920 |
+
"loss": 2.0786,
|
136921 |
+
"step": 194480
|
136922 |
+
},
|
136923 |
+
{
|
136924 |
+
"epoch": 0.97245,
|
136925 |
+
"grad_norm": 0.625,
|
136926 |
+
"learning_rate": 8.306532663316583e-05,
|
136927 |
+
"loss": 2.0466,
|
136928 |
+
"step": 194490
|
136929 |
+
},
|
136930 |
+
{
|
136931 |
+
"epoch": 0.9725,
|
136932 |
+
"grad_norm": 0.65625,
|
136933 |
+
"learning_rate": 8.291457286432161e-05,
|
136934 |
+
"loss": 2.1189,
|
136935 |
+
"step": 194500
|
136936 |
+
},
|
136937 |
+
{
|
136938 |
+
"epoch": 0.97255,
|
136939 |
+
"grad_norm": 0.63671875,
|
136940 |
+
"learning_rate": 8.276381909547738e-05,
|
136941 |
+
"loss": 2.0692,
|
136942 |
+
"step": 194510
|
136943 |
+
},
|
136944 |
+
{
|
136945 |
+
"epoch": 0.9726,
|
136946 |
+
"grad_norm": 0.58984375,
|
136947 |
+
"learning_rate": 8.261306532663316e-05,
|
136948 |
+
"loss": 2.1415,
|
136949 |
+
"step": 194520
|
136950 |
+
},
|
136951 |
+
{
|
136952 |
+
"epoch": 0.97265,
|
136953 |
+
"grad_norm": 0.65625,
|
136954 |
+
"learning_rate": 8.246231155778894e-05,
|
136955 |
+
"loss": 2.0286,
|
136956 |
+
"step": 194530
|
136957 |
+
},
|
136958 |
+
{
|
136959 |
+
"epoch": 0.9727,
|
136960 |
+
"grad_norm": 0.5546875,
|
136961 |
+
"learning_rate": 8.231155778894472e-05,
|
136962 |
+
"loss": 2.1371,
|
136963 |
+
"step": 194540
|
136964 |
+
},
|
136965 |
+
{
|
136966 |
+
"epoch": 0.97275,
|
136967 |
+
"grad_norm": 0.61328125,
|
136968 |
+
"learning_rate": 8.21608040201005e-05,
|
136969 |
+
"loss": 2.071,
|
136970 |
+
"step": 194550
|
136971 |
+
},
|
136972 |
+
{
|
136973 |
+
"epoch": 0.9728,
|
136974 |
+
"grad_norm": 0.53515625,
|
136975 |
+
"learning_rate": 8.201005025125628e-05,
|
136976 |
+
"loss": 2.0851,
|
136977 |
+
"step": 194560
|
136978 |
+
},
|
136979 |
+
{
|
136980 |
+
"epoch": 0.97285,
|
136981 |
+
"grad_norm": 0.57421875,
|
136982 |
+
"learning_rate": 8.185929648241206e-05,
|
136983 |
+
"loss": 2.0778,
|
136984 |
+
"step": 194570
|
136985 |
+
},
|
136986 |
+
{
|
136987 |
+
"epoch": 0.9729,
|
136988 |
+
"grad_norm": 0.66796875,
|
136989 |
+
"learning_rate": 8.170854271356784e-05,
|
136990 |
+
"loss": 2.0634,
|
136991 |
+
"step": 194580
|
136992 |
+
},
|
136993 |
+
{
|
136994 |
+
"epoch": 0.97295,
|
136995 |
+
"grad_norm": 0.62109375,
|
136996 |
+
"learning_rate": 8.155778894472362e-05,
|
136997 |
+
"loss": 2.1477,
|
136998 |
+
"step": 194590
|
136999 |
+
},
|
137000 |
+
{
|
137001 |
+
"epoch": 0.973,
|
137002 |
+
"grad_norm": 0.6953125,
|
137003 |
+
"learning_rate": 8.14070351758794e-05,
|
137004 |
+
"loss": 2.0247,
|
137005 |
+
"step": 194600
|
137006 |
+
},
|
137007 |
+
{
|
137008 |
+
"epoch": 0.97305,
|
137009 |
+
"grad_norm": 0.60546875,
|
137010 |
+
"learning_rate": 8.125628140703518e-05,
|
137011 |
+
"loss": 2.1606,
|
137012 |
+
"step": 194610
|
137013 |
+
},
|
137014 |
+
{
|
137015 |
+
"epoch": 0.9731,
|
137016 |
+
"grad_norm": 0.6328125,
|
137017 |
+
"learning_rate": 8.110552763819096e-05,
|
137018 |
+
"loss": 2.0624,
|
137019 |
+
"step": 194620
|
137020 |
+
},
|
137021 |
+
{
|
137022 |
+
"epoch": 0.97315,
|
137023 |
+
"grad_norm": 0.80859375,
|
137024 |
+
"learning_rate": 8.095477386934673e-05,
|
137025 |
+
"loss": 2.068,
|
137026 |
+
"step": 194630
|
137027 |
+
},
|
137028 |
+
{
|
137029 |
+
"epoch": 0.9732,
|
137030 |
+
"grad_norm": 0.5859375,
|
137031 |
+
"learning_rate": 8.080402010050251e-05,
|
137032 |
+
"loss": 2.1179,
|
137033 |
+
"step": 194640
|
137034 |
+
},
|
137035 |
+
{
|
137036 |
+
"epoch": 0.97325,
|
137037 |
+
"grad_norm": 0.5546875,
|
137038 |
+
"learning_rate": 8.06532663316583e-05,
|
137039 |
+
"loss": 2.0413,
|
137040 |
+
"step": 194650
|
137041 |
+
},
|
137042 |
+
{
|
137043 |
+
"epoch": 0.9733,
|
137044 |
+
"grad_norm": 0.6484375,
|
137045 |
+
"learning_rate": 8.050251256281407e-05,
|
137046 |
+
"loss": 2.0631,
|
137047 |
+
"step": 194660
|
137048 |
+
},
|
137049 |
+
{
|
137050 |
+
"epoch": 0.97335,
|
137051 |
+
"grad_norm": 0.64453125,
|
137052 |
+
"learning_rate": 8.035175879396985e-05,
|
137053 |
+
"loss": 2.0582,
|
137054 |
+
"step": 194670
|
137055 |
+
},
|
137056 |
+
{
|
137057 |
+
"epoch": 0.9734,
|
137058 |
+
"grad_norm": 0.703125,
|
137059 |
+
"learning_rate": 8.020100502512563e-05,
|
137060 |
+
"loss": 2.1336,
|
137061 |
+
"step": 194680
|
137062 |
+
},
|
137063 |
+
{
|
137064 |
+
"epoch": 0.97345,
|
137065 |
+
"grad_norm": 0.59765625,
|
137066 |
+
"learning_rate": 8.005025125628141e-05,
|
137067 |
+
"loss": 2.0992,
|
137068 |
+
"step": 194690
|
137069 |
+
},
|
137070 |
+
{
|
137071 |
+
"epoch": 0.9735,
|
137072 |
+
"grad_norm": 0.61328125,
|
137073 |
+
"learning_rate": 7.989949748743718e-05,
|
137074 |
+
"loss": 2.1031,
|
137075 |
+
"step": 194700
|
137076 |
+
},
|
137077 |
+
{
|
137078 |
+
"epoch": 0.97355,
|
137079 |
+
"grad_norm": 0.6171875,
|
137080 |
+
"learning_rate": 7.974874371859296e-05,
|
137081 |
+
"loss": 2.102,
|
137082 |
+
"step": 194710
|
137083 |
+
},
|
137084 |
+
{
|
137085 |
+
"epoch": 0.9736,
|
137086 |
+
"grad_norm": 0.6171875,
|
137087 |
+
"learning_rate": 7.959798994974874e-05,
|
137088 |
+
"loss": 2.0675,
|
137089 |
+
"step": 194720
|
137090 |
+
},
|
137091 |
+
{
|
137092 |
+
"epoch": 0.97365,
|
137093 |
+
"grad_norm": 0.58984375,
|
137094 |
+
"learning_rate": 7.944723618090452e-05,
|
137095 |
+
"loss": 2.0498,
|
137096 |
+
"step": 194730
|
137097 |
+
},
|
137098 |
+
{
|
137099 |
+
"epoch": 0.9737,
|
137100 |
+
"grad_norm": 0.64453125,
|
137101 |
+
"learning_rate": 7.92964824120603e-05,
|
137102 |
+
"loss": 2.0321,
|
137103 |
+
"step": 194740
|
137104 |
+
},
|
137105 |
+
{
|
137106 |
+
"epoch": 0.97375,
|
137107 |
+
"grad_norm": 0.625,
|
137108 |
+
"learning_rate": 7.914572864321608e-05,
|
137109 |
+
"loss": 2.0695,
|
137110 |
+
"step": 194750
|
137111 |
+
},
|
137112 |
+
{
|
137113 |
+
"epoch": 0.9738,
|
137114 |
+
"grad_norm": 0.671875,
|
137115 |
+
"learning_rate": 7.899497487437186e-05,
|
137116 |
+
"loss": 2.0564,
|
137117 |
+
"step": 194760
|
137118 |
+
},
|
137119 |
+
{
|
137120 |
+
"epoch": 0.97385,
|
137121 |
+
"grad_norm": 0.5390625,
|
137122 |
+
"learning_rate": 7.884422110552764e-05,
|
137123 |
+
"loss": 2.1664,
|
137124 |
+
"step": 194770
|
137125 |
+
},
|
137126 |
+
{
|
137127 |
+
"epoch": 0.9739,
|
137128 |
+
"grad_norm": 0.6484375,
|
137129 |
+
"learning_rate": 7.869346733668342e-05,
|
137130 |
+
"loss": 2.0693,
|
137131 |
+
"step": 194780
|
137132 |
+
},
|
137133 |
+
{
|
137134 |
+
"epoch": 0.97395,
|
137135 |
+
"grad_norm": 0.69140625,
|
137136 |
+
"learning_rate": 7.85427135678392e-05,
|
137137 |
+
"loss": 2.0349,
|
137138 |
+
"step": 194790
|
137139 |
+
},
|
137140 |
+
{
|
137141 |
+
"epoch": 0.974,
|
137142 |
+
"grad_norm": 0.640625,
|
137143 |
+
"learning_rate": 7.839195979899498e-05,
|
137144 |
+
"loss": 2.0468,
|
137145 |
+
"step": 194800
|
137146 |
+
},
|
137147 |
+
{
|
137148 |
+
"epoch": 0.97405,
|
137149 |
+
"grad_norm": 0.69921875,
|
137150 |
+
"learning_rate": 7.824120603015076e-05,
|
137151 |
+
"loss": 2.1135,
|
137152 |
+
"step": 194810
|
137153 |
+
},
|
137154 |
+
{
|
137155 |
+
"epoch": 0.9741,
|
137156 |
+
"grad_norm": 0.546875,
|
137157 |
+
"learning_rate": 7.809045226130654e-05,
|
137158 |
+
"loss": 2.1618,
|
137159 |
+
"step": 194820
|
137160 |
+
},
|
137161 |
+
{
|
137162 |
+
"epoch": 0.97415,
|
137163 |
+
"grad_norm": 0.61328125,
|
137164 |
+
"learning_rate": 7.793969849246232e-05,
|
137165 |
+
"loss": 2.06,
|
137166 |
+
"step": 194830
|
137167 |
+
},
|
137168 |
+
{
|
137169 |
+
"epoch": 0.9742,
|
137170 |
+
"grad_norm": 0.609375,
|
137171 |
+
"learning_rate": 7.77889447236181e-05,
|
137172 |
+
"loss": 2.1369,
|
137173 |
+
"step": 194840
|
137174 |
+
},
|
137175 |
+
{
|
137176 |
+
"epoch": 0.97425,
|
137177 |
+
"grad_norm": 0.5703125,
|
137178 |
+
"learning_rate": 7.763819095477388e-05,
|
137179 |
+
"loss": 2.1368,
|
137180 |
+
"step": 194850
|
137181 |
+
},
|
137182 |
+
{
|
137183 |
+
"epoch": 0.9743,
|
137184 |
+
"grad_norm": 0.59375,
|
137185 |
+
"learning_rate": 7.748743718592966e-05,
|
137186 |
+
"loss": 2.0634,
|
137187 |
+
"step": 194860
|
137188 |
+
},
|
137189 |
+
{
|
137190 |
+
"epoch": 0.97435,
|
137191 |
+
"grad_norm": 0.58984375,
|
137192 |
+
"learning_rate": 7.733668341708543e-05,
|
137193 |
+
"loss": 2.075,
|
137194 |
+
"step": 194870
|
137195 |
+
},
|
137196 |
+
{
|
137197 |
+
"epoch": 0.9744,
|
137198 |
+
"grad_norm": 0.546875,
|
137199 |
+
"learning_rate": 7.718592964824121e-05,
|
137200 |
+
"loss": 2.0687,
|
137201 |
+
"step": 194880
|
137202 |
+
},
|
137203 |
+
{
|
137204 |
+
"epoch": 0.97445,
|
137205 |
+
"grad_norm": 0.6484375,
|
137206 |
+
"learning_rate": 7.7035175879397e-05,
|
137207 |
+
"loss": 2.0964,
|
137208 |
+
"step": 194890
|
137209 |
+
},
|
137210 |
+
{
|
137211 |
+
"epoch": 0.9745,
|
137212 |
+
"grad_norm": 0.58203125,
|
137213 |
+
"learning_rate": 7.688442211055277e-05,
|
137214 |
+
"loss": 2.0518,
|
137215 |
+
"step": 194900
|
137216 |
+
},
|
137217 |
+
{
|
137218 |
+
"epoch": 0.97455,
|
137219 |
+
"grad_norm": 0.59765625,
|
137220 |
+
"learning_rate": 7.673366834170854e-05,
|
137221 |
+
"loss": 2.1192,
|
137222 |
+
"step": 194910
|
137223 |
+
},
|
137224 |
+
{
|
137225 |
+
"epoch": 0.9746,
|
137226 |
+
"grad_norm": 0.62109375,
|
137227 |
+
"learning_rate": 7.658291457286432e-05,
|
137228 |
+
"loss": 2.0555,
|
137229 |
+
"step": 194920
|
137230 |
+
},
|
137231 |
+
{
|
137232 |
+
"epoch": 0.97465,
|
137233 |
+
"grad_norm": 0.59765625,
|
137234 |
+
"learning_rate": 7.64321608040201e-05,
|
137235 |
+
"loss": 2.1182,
|
137236 |
+
"step": 194930
|
137237 |
+
},
|
137238 |
+
{
|
137239 |
+
"epoch": 0.9747,
|
137240 |
+
"grad_norm": 0.6171875,
|
137241 |
+
"learning_rate": 7.628140703517588e-05,
|
137242 |
+
"loss": 2.0265,
|
137243 |
+
"step": 194940
|
137244 |
+
},
|
137245 |
+
{
|
137246 |
+
"epoch": 0.97475,
|
137247 |
+
"grad_norm": 0.58203125,
|
137248 |
+
"learning_rate": 7.613065326633166e-05,
|
137249 |
+
"loss": 2.1029,
|
137250 |
+
"step": 194950
|
137251 |
+
},
|
137252 |
+
{
|
137253 |
+
"epoch": 0.9748,
|
137254 |
+
"grad_norm": 0.63671875,
|
137255 |
+
"learning_rate": 7.597989949748744e-05,
|
137256 |
+
"loss": 2.0365,
|
137257 |
+
"step": 194960
|
137258 |
+
},
|
137259 |
+
{
|
137260 |
+
"epoch": 0.97485,
|
137261 |
+
"grad_norm": 0.640625,
|
137262 |
+
"learning_rate": 7.582914572864322e-05,
|
137263 |
+
"loss": 2.0575,
|
137264 |
+
"step": 194970
|
137265 |
+
},
|
137266 |
+
{
|
137267 |
+
"epoch": 0.9749,
|
137268 |
+
"grad_norm": 0.5859375,
|
137269 |
+
"learning_rate": 7.5678391959799e-05,
|
137270 |
+
"loss": 2.09,
|
137271 |
+
"step": 194980
|
137272 |
+
},
|
137273 |
+
{
|
137274 |
+
"epoch": 0.97495,
|
137275 |
+
"grad_norm": 0.59765625,
|
137276 |
+
"learning_rate": 7.552763819095478e-05,
|
137277 |
+
"loss": 2.0609,
|
137278 |
+
"step": 194990
|
137279 |
+
},
|
137280 |
+
{
|
137281 |
+
"epoch": 0.975,
|
137282 |
+
"grad_norm": 0.5703125,
|
137283 |
+
"learning_rate": 7.537688442211056e-05,
|
137284 |
+
"loss": 2.1511,
|
137285 |
+
"step": 195000
|
137286 |
+
},
|
137287 |
+
{
|
137288 |
+
"epoch": 0.97505,
|
137289 |
+
"grad_norm": 0.57421875,
|
137290 |
+
"learning_rate": 7.522613065326634e-05,
|
137291 |
+
"loss": 2.0456,
|
137292 |
+
"step": 195010
|
137293 |
+
},
|
137294 |
+
{
|
137295 |
+
"epoch": 0.9751,
|
137296 |
+
"grad_norm": 0.57421875,
|
137297 |
+
"learning_rate": 7.507537688442212e-05,
|
137298 |
+
"loss": 2.0705,
|
137299 |
+
"step": 195020
|
137300 |
+
},
|
137301 |
+
{
|
137302 |
+
"epoch": 0.97515,
|
137303 |
+
"grad_norm": 0.59765625,
|
137304 |
+
"learning_rate": 7.49246231155779e-05,
|
137305 |
+
"loss": 2.0374,
|
137306 |
+
"step": 195030
|
137307 |
+
},
|
137308 |
+
{
|
137309 |
+
"epoch": 0.9752,
|
137310 |
+
"grad_norm": 0.56640625,
|
137311 |
+
"learning_rate": 7.477386934673368e-05,
|
137312 |
+
"loss": 2.1065,
|
137313 |
+
"step": 195040
|
137314 |
+
},
|
137315 |
+
{
|
137316 |
+
"epoch": 0.97525,
|
137317 |
+
"grad_norm": 0.58203125,
|
137318 |
+
"learning_rate": 7.462311557788946e-05,
|
137319 |
+
"loss": 2.0951,
|
137320 |
+
"step": 195050
|
137321 |
+
},
|
137322 |
+
{
|
137323 |
+
"epoch": 0.9753,
|
137324 |
+
"grad_norm": 0.7265625,
|
137325 |
+
"learning_rate": 7.447236180904524e-05,
|
137326 |
+
"loss": 2.0473,
|
137327 |
+
"step": 195060
|
137328 |
+
},
|
137329 |
+
{
|
137330 |
+
"epoch": 0.97535,
|
137331 |
+
"grad_norm": 0.69140625,
|
137332 |
+
"learning_rate": 7.432160804020102e-05,
|
137333 |
+
"loss": 2.121,
|
137334 |
+
"step": 195070
|
137335 |
+
},
|
137336 |
+
{
|
137337 |
+
"epoch": 0.9754,
|
137338 |
+
"grad_norm": 0.62890625,
|
137339 |
+
"learning_rate": 7.41708542713568e-05,
|
137340 |
+
"loss": 2.0394,
|
137341 |
+
"step": 195080
|
137342 |
+
},
|
137343 |
+
{
|
137344 |
+
"epoch": 0.97545,
|
137345 |
+
"grad_norm": 0.6875,
|
137346 |
+
"learning_rate": 7.402010050251256e-05,
|
137347 |
+
"loss": 2.094,
|
137348 |
+
"step": 195090
|
137349 |
+
},
|
137350 |
+
{
|
137351 |
+
"epoch": 0.9755,
|
137352 |
+
"grad_norm": 0.5859375,
|
137353 |
+
"learning_rate": 7.386934673366834e-05,
|
137354 |
+
"loss": 2.0743,
|
137355 |
+
"step": 195100
|
137356 |
+
},
|
137357 |
+
{
|
137358 |
+
"epoch": 0.97555,
|
137359 |
+
"grad_norm": 0.58984375,
|
137360 |
+
"learning_rate": 7.371859296482412e-05,
|
137361 |
+
"loss": 2.0448,
|
137362 |
+
"step": 195110
|
137363 |
+
},
|
137364 |
+
{
|
137365 |
+
"epoch": 0.9756,
|
137366 |
+
"grad_norm": 0.59765625,
|
137367 |
+
"learning_rate": 7.35678391959799e-05,
|
137368 |
+
"loss": 2.1157,
|
137369 |
+
"step": 195120
|
137370 |
+
},
|
137371 |
+
{
|
137372 |
+
"epoch": 0.97565,
|
137373 |
+
"grad_norm": 0.625,
|
137374 |
+
"learning_rate": 7.341708542713568e-05,
|
137375 |
+
"loss": 2.0819,
|
137376 |
+
"step": 195130
|
137377 |
+
},
|
137378 |
+
{
|
137379 |
+
"epoch": 0.9757,
|
137380 |
+
"grad_norm": 0.5625,
|
137381 |
+
"learning_rate": 7.326633165829146e-05,
|
137382 |
+
"loss": 2.0982,
|
137383 |
+
"step": 195140
|
137384 |
+
},
|
137385 |
+
{
|
137386 |
+
"epoch": 0.97575,
|
137387 |
+
"grad_norm": 0.72265625,
|
137388 |
+
"learning_rate": 7.311557788944724e-05,
|
137389 |
+
"loss": 1.9995,
|
137390 |
+
"step": 195150
|
137391 |
+
},
|
137392 |
+
{
|
137393 |
+
"epoch": 0.9758,
|
137394 |
+
"grad_norm": 0.63671875,
|
137395 |
+
"learning_rate": 7.2964824120603e-05,
|
137396 |
+
"loss": 2.0914,
|
137397 |
+
"step": 195160
|
137398 |
+
},
|
137399 |
+
{
|
137400 |
+
"epoch": 0.97585,
|
137401 |
+
"grad_norm": 0.58203125,
|
137402 |
+
"learning_rate": 7.281407035175879e-05,
|
137403 |
+
"loss": 2.1213,
|
137404 |
+
"step": 195170
|
137405 |
+
},
|
137406 |
+
{
|
137407 |
+
"epoch": 0.9759,
|
137408 |
+
"grad_norm": 0.5625,
|
137409 |
+
"learning_rate": 7.266331658291457e-05,
|
137410 |
+
"loss": 2.0208,
|
137411 |
+
"step": 195180
|
137412 |
+
},
|
137413 |
+
{
|
137414 |
+
"epoch": 0.97595,
|
137415 |
+
"grad_norm": 0.5859375,
|
137416 |
+
"learning_rate": 7.251256281407035e-05,
|
137417 |
+
"loss": 2.1003,
|
137418 |
+
"step": 195190
|
137419 |
+
},
|
137420 |
+
{
|
137421 |
+
"epoch": 0.976,
|
137422 |
+
"grad_norm": 0.7578125,
|
137423 |
+
"learning_rate": 7.236180904522613e-05,
|
137424 |
+
"loss": 2.0308,
|
137425 |
+
"step": 195200
|
137426 |
+
},
|
137427 |
+
{
|
137428 |
+
"epoch": 0.97605,
|
137429 |
+
"grad_norm": 0.671875,
|
137430 |
+
"learning_rate": 7.22110552763819e-05,
|
137431 |
+
"loss": 2.107,
|
137432 |
+
"step": 195210
|
137433 |
+
},
|
137434 |
+
{
|
137435 |
+
"epoch": 0.9761,
|
137436 |
+
"grad_norm": 0.62890625,
|
137437 |
+
"learning_rate": 7.206030150753768e-05,
|
137438 |
+
"loss": 2.0508,
|
137439 |
+
"step": 195220
|
137440 |
+
},
|
137441 |
+
{
|
137442 |
+
"epoch": 0.97615,
|
137443 |
+
"grad_norm": 0.58984375,
|
137444 |
+
"learning_rate": 7.190954773869346e-05,
|
137445 |
+
"loss": 2.089,
|
137446 |
+
"step": 195230
|
137447 |
+
},
|
137448 |
+
{
|
137449 |
+
"epoch": 0.9762,
|
137450 |
+
"grad_norm": 0.6171875,
|
137451 |
+
"learning_rate": 7.175879396984924e-05,
|
137452 |
+
"loss": 2.0705,
|
137453 |
+
"step": 195240
|
137454 |
+
},
|
137455 |
+
{
|
137456 |
+
"epoch": 0.97625,
|
137457 |
+
"grad_norm": 0.6328125,
|
137458 |
+
"learning_rate": 7.160804020100502e-05,
|
137459 |
+
"loss": 2.0752,
|
137460 |
+
"step": 195250
|
137461 |
+
},
|
137462 |
+
{
|
137463 |
+
"epoch": 0.9763,
|
137464 |
+
"grad_norm": 0.625,
|
137465 |
+
"learning_rate": 7.14572864321608e-05,
|
137466 |
+
"loss": 2.0629,
|
137467 |
+
"step": 195260
|
137468 |
+
},
|
137469 |
+
{
|
137470 |
+
"epoch": 0.97635,
|
137471 |
+
"grad_norm": 0.71484375,
|
137472 |
+
"learning_rate": 7.130653266331658e-05,
|
137473 |
+
"loss": 2.044,
|
137474 |
+
"step": 195270
|
137475 |
+
},
|
137476 |
+
{
|
137477 |
+
"epoch": 0.9764,
|
137478 |
+
"grad_norm": 0.625,
|
137479 |
+
"learning_rate": 7.115577889447236e-05,
|
137480 |
+
"loss": 2.1158,
|
137481 |
+
"step": 195280
|
137482 |
+
},
|
137483 |
+
{
|
137484 |
+
"epoch": 0.97645,
|
137485 |
+
"grad_norm": 0.734375,
|
137486 |
+
"learning_rate": 7.100502512562814e-05,
|
137487 |
+
"loss": 2.0761,
|
137488 |
+
"step": 195290
|
137489 |
+
},
|
137490 |
+
{
|
137491 |
+
"epoch": 0.9765,
|
137492 |
+
"grad_norm": 0.60546875,
|
137493 |
+
"learning_rate": 7.085427135678392e-05,
|
137494 |
+
"loss": 2.0954,
|
137495 |
+
"step": 195300
|
137496 |
+
},
|
137497 |
+
{
|
137498 |
+
"epoch": 0.97655,
|
137499 |
+
"grad_norm": 0.6171875,
|
137500 |
+
"learning_rate": 7.07035175879397e-05,
|
137501 |
+
"loss": 2.0375,
|
137502 |
+
"step": 195310
|
137503 |
+
},
|
137504 |
+
{
|
137505 |
+
"epoch": 0.9766,
|
137506 |
+
"grad_norm": 0.65625,
|
137507 |
+
"learning_rate": 7.055276381909548e-05,
|
137508 |
+
"loss": 2.0542,
|
137509 |
+
"step": 195320
|
137510 |
+
},
|
137511 |
+
{
|
137512 |
+
"epoch": 0.97665,
|
137513 |
+
"grad_norm": 0.59375,
|
137514 |
+
"learning_rate": 7.040201005025126e-05,
|
137515 |
+
"loss": 2.0643,
|
137516 |
+
"step": 195330
|
137517 |
+
},
|
137518 |
+
{
|
137519 |
+
"epoch": 0.9767,
|
137520 |
+
"grad_norm": 0.69140625,
|
137521 |
+
"learning_rate": 7.025125628140704e-05,
|
137522 |
+
"loss": 2.0963,
|
137523 |
+
"step": 195340
|
137524 |
+
},
|
137525 |
+
{
|
137526 |
+
"epoch": 0.97675,
|
137527 |
+
"grad_norm": 0.7421875,
|
137528 |
+
"learning_rate": 7.010050251256282e-05,
|
137529 |
+
"loss": 2.0833,
|
137530 |
+
"step": 195350
|
137531 |
+
},
|
137532 |
+
{
|
137533 |
+
"epoch": 0.9768,
|
137534 |
+
"grad_norm": 0.62890625,
|
137535 |
+
"learning_rate": 6.99497487437186e-05,
|
137536 |
+
"loss": 2.0813,
|
137537 |
+
"step": 195360
|
137538 |
+
},
|
137539 |
+
{
|
137540 |
+
"epoch": 0.97685,
|
137541 |
+
"grad_norm": 0.59375,
|
137542 |
+
"learning_rate": 6.979899497487437e-05,
|
137543 |
+
"loss": 2.0895,
|
137544 |
+
"step": 195370
|
137545 |
+
},
|
137546 |
+
{
|
137547 |
+
"epoch": 0.9769,
|
137548 |
+
"grad_norm": 0.7109375,
|
137549 |
+
"learning_rate": 6.964824120603015e-05,
|
137550 |
+
"loss": 2.0825,
|
137551 |
+
"step": 195380
|
137552 |
+
},
|
137553 |
+
{
|
137554 |
+
"epoch": 0.97695,
|
137555 |
+
"grad_norm": 0.5625,
|
137556 |
+
"learning_rate": 6.949748743718593e-05,
|
137557 |
+
"loss": 2.0591,
|
137558 |
+
"step": 195390
|
137559 |
+
},
|
137560 |
+
{
|
137561 |
+
"epoch": 0.977,
|
137562 |
+
"grad_norm": 0.5859375,
|
137563 |
+
"learning_rate": 6.93467336683417e-05,
|
137564 |
+
"loss": 2.0933,
|
137565 |
+
"step": 195400
|
137566 |
+
},
|
137567 |
+
{
|
137568 |
+
"epoch": 0.97705,
|
137569 |
+
"grad_norm": 0.6484375,
|
137570 |
+
"learning_rate": 6.919597989949749e-05,
|
137571 |
+
"loss": 2.0985,
|
137572 |
+
"step": 195410
|
137573 |
+
},
|
137574 |
+
{
|
137575 |
+
"epoch": 0.9771,
|
137576 |
+
"grad_norm": 0.58984375,
|
137577 |
+
"learning_rate": 6.904522613065327e-05,
|
137578 |
+
"loss": 2.1114,
|
137579 |
+
"step": 195420
|
137580 |
+
},
|
137581 |
+
{
|
137582 |
+
"epoch": 0.97715,
|
137583 |
+
"grad_norm": 0.703125,
|
137584 |
+
"learning_rate": 6.889447236180905e-05,
|
137585 |
+
"loss": 2.0591,
|
137586 |
+
"step": 195430
|
137587 |
+
},
|
137588 |
+
{
|
137589 |
+
"epoch": 0.9772,
|
137590 |
+
"grad_norm": 0.6015625,
|
137591 |
+
"learning_rate": 6.874371859296482e-05,
|
137592 |
+
"loss": 2.1221,
|
137593 |
+
"step": 195440
|
137594 |
+
},
|
137595 |
+
{
|
137596 |
+
"epoch": 0.97725,
|
137597 |
+
"grad_norm": 0.640625,
|
137598 |
+
"learning_rate": 6.85929648241206e-05,
|
137599 |
+
"loss": 2.0875,
|
137600 |
+
"step": 195450
|
137601 |
+
},
|
137602 |
+
{
|
137603 |
+
"epoch": 0.9773,
|
137604 |
+
"grad_norm": 0.59765625,
|
137605 |
+
"learning_rate": 6.844221105527638e-05,
|
137606 |
+
"loss": 2.0736,
|
137607 |
+
"step": 195460
|
137608 |
+
},
|
137609 |
+
{
|
137610 |
+
"epoch": 0.97735,
|
137611 |
+
"grad_norm": 0.61328125,
|
137612 |
+
"learning_rate": 6.829145728643216e-05,
|
137613 |
+
"loss": 2.115,
|
137614 |
+
"step": 195470
|
137615 |
+
},
|
137616 |
+
{
|
137617 |
+
"epoch": 0.9774,
|
137618 |
+
"grad_norm": 0.63671875,
|
137619 |
+
"learning_rate": 6.814070351758794e-05,
|
137620 |
+
"loss": 2.056,
|
137621 |
+
"step": 195480
|
137622 |
+
},
|
137623 |
+
{
|
137624 |
+
"epoch": 0.97745,
|
137625 |
+
"grad_norm": 0.5546875,
|
137626 |
+
"learning_rate": 6.798994974874372e-05,
|
137627 |
+
"loss": 2.1237,
|
137628 |
+
"step": 195490
|
137629 |
+
},
|
137630 |
+
{
|
137631 |
+
"epoch": 0.9775,
|
137632 |
+
"grad_norm": 0.62890625,
|
137633 |
+
"learning_rate": 6.78391959798995e-05,
|
137634 |
+
"loss": 2.0352,
|
137635 |
+
"step": 195500
|
137636 |
+
},
|
137637 |
+
{
|
137638 |
+
"epoch": 0.97755,
|
137639 |
+
"grad_norm": 0.69921875,
|
137640 |
+
"learning_rate": 6.768844221105528e-05,
|
137641 |
+
"loss": 2.1308,
|
137642 |
+
"step": 195510
|
137643 |
+
},
|
137644 |
+
{
|
137645 |
+
"epoch": 0.9776,
|
137646 |
+
"grad_norm": 0.62109375,
|
137647 |
+
"learning_rate": 6.753768844221106e-05,
|
137648 |
+
"loss": 2.0726,
|
137649 |
+
"step": 195520
|
137650 |
+
},
|
137651 |
+
{
|
137652 |
+
"epoch": 0.97765,
|
137653 |
+
"grad_norm": 0.6171875,
|
137654 |
+
"learning_rate": 6.738693467336684e-05,
|
137655 |
+
"loss": 2.0971,
|
137656 |
+
"step": 195530
|
137657 |
+
},
|
137658 |
+
{
|
137659 |
+
"epoch": 0.9777,
|
137660 |
+
"grad_norm": 0.66015625,
|
137661 |
+
"learning_rate": 6.723618090452262e-05,
|
137662 |
+
"loss": 2.074,
|
137663 |
+
"step": 195540
|
137664 |
+
},
|
137665 |
+
{
|
137666 |
+
"epoch": 0.97775,
|
137667 |
+
"grad_norm": 0.60546875,
|
137668 |
+
"learning_rate": 6.70854271356784e-05,
|
137669 |
+
"loss": 2.0329,
|
137670 |
+
"step": 195550
|
137671 |
+
},
|
137672 |
+
{
|
137673 |
+
"epoch": 0.9778,
|
137674 |
+
"grad_norm": 0.63671875,
|
137675 |
+
"learning_rate": 6.693467336683418e-05,
|
137676 |
+
"loss": 2.1586,
|
137677 |
+
"step": 195560
|
137678 |
+
},
|
137679 |
+
{
|
137680 |
+
"epoch": 0.97785,
|
137681 |
+
"grad_norm": 0.5859375,
|
137682 |
+
"learning_rate": 6.678391959798996e-05,
|
137683 |
+
"loss": 2.0702,
|
137684 |
+
"step": 195570
|
137685 |
+
},
|
137686 |
+
{
|
137687 |
+
"epoch": 0.9779,
|
137688 |
+
"grad_norm": 0.6015625,
|
137689 |
+
"learning_rate": 6.663316582914573e-05,
|
137690 |
+
"loss": 2.1121,
|
137691 |
+
"step": 195580
|
137692 |
+
},
|
137693 |
+
{
|
137694 |
+
"epoch": 0.97795,
|
137695 |
+
"grad_norm": 0.625,
|
137696 |
+
"learning_rate": 6.648241206030151e-05,
|
137697 |
+
"loss": 2.0714,
|
137698 |
+
"step": 195590
|
137699 |
+
},
|
137700 |
+
{
|
137701 |
+
"epoch": 0.978,
|
137702 |
+
"grad_norm": 0.640625,
|
137703 |
+
"learning_rate": 6.633165829145729e-05,
|
137704 |
+
"loss": 2.119,
|
137705 |
+
"step": 195600
|
137706 |
+
},
|
137707 |
+
{
|
137708 |
+
"epoch": 0.97805,
|
137709 |
+
"grad_norm": 0.671875,
|
137710 |
+
"learning_rate": 6.618090452261307e-05,
|
137711 |
+
"loss": 2.0327,
|
137712 |
+
"step": 195610
|
137713 |
+
},
|
137714 |
+
{
|
137715 |
+
"epoch": 0.9781,
|
137716 |
+
"grad_norm": 0.5859375,
|
137717 |
+
"learning_rate": 6.603015075376885e-05,
|
137718 |
+
"loss": 2.1342,
|
137719 |
+
"step": 195620
|
137720 |
+
},
|
137721 |
+
{
|
137722 |
+
"epoch": 0.97815,
|
137723 |
+
"grad_norm": 0.578125,
|
137724 |
+
"learning_rate": 6.587939698492463e-05,
|
137725 |
+
"loss": 2.0827,
|
137726 |
+
"step": 195630
|
137727 |
+
},
|
137728 |
+
{
|
137729 |
+
"epoch": 0.9782,
|
137730 |
+
"grad_norm": 0.875,
|
137731 |
+
"learning_rate": 6.57286432160804e-05,
|
137732 |
+
"loss": 2.0745,
|
137733 |
+
"step": 195640
|
137734 |
+
},
|
137735 |
+
{
|
137736 |
+
"epoch": 0.97825,
|
137737 |
+
"grad_norm": 0.56640625,
|
137738 |
+
"learning_rate": 6.557788944723619e-05,
|
137739 |
+
"loss": 2.0922,
|
137740 |
+
"step": 195650
|
137741 |
+
},
|
137742 |
+
{
|
137743 |
+
"epoch": 0.9783,
|
137744 |
+
"grad_norm": 0.625,
|
137745 |
+
"learning_rate": 6.542713567839197e-05,
|
137746 |
+
"loss": 2.05,
|
137747 |
+
"step": 195660
|
137748 |
+
},
|
137749 |
+
{
|
137750 |
+
"epoch": 0.97835,
|
137751 |
+
"grad_norm": 0.5703125,
|
137752 |
+
"learning_rate": 6.527638190954773e-05,
|
137753 |
+
"loss": 2.0529,
|
137754 |
+
"step": 195670
|
137755 |
+
},
|
137756 |
+
{
|
137757 |
+
"epoch": 0.9784,
|
137758 |
+
"grad_norm": 0.62890625,
|
137759 |
+
"learning_rate": 6.512562814070351e-05,
|
137760 |
+
"loss": 2.1066,
|
137761 |
+
"step": 195680
|
137762 |
+
},
|
137763 |
+
{
|
137764 |
+
"epoch": 0.97845,
|
137765 |
+
"grad_norm": 0.57421875,
|
137766 |
+
"learning_rate": 6.497487437185929e-05,
|
137767 |
+
"loss": 2.1088,
|
137768 |
+
"step": 195690
|
137769 |
+
},
|
137770 |
+
{
|
137771 |
+
"epoch": 0.9785,
|
137772 |
+
"grad_norm": 0.62109375,
|
137773 |
+
"learning_rate": 6.482412060301507e-05,
|
137774 |
+
"loss": 2.0692,
|
137775 |
+
"step": 195700
|
137776 |
+
},
|
137777 |
+
{
|
137778 |
+
"epoch": 0.97855,
|
137779 |
+
"grad_norm": 0.6640625,
|
137780 |
+
"learning_rate": 6.467336683417085e-05,
|
137781 |
+
"loss": 2.0633,
|
137782 |
+
"step": 195710
|
137783 |
+
},
|
137784 |
+
{
|
137785 |
+
"epoch": 0.9786,
|
137786 |
+
"grad_norm": 0.53125,
|
137787 |
+
"learning_rate": 6.452261306532663e-05,
|
137788 |
+
"loss": 2.0881,
|
137789 |
+
"step": 195720
|
137790 |
+
},
|
137791 |
+
{
|
137792 |
+
"epoch": 0.97865,
|
137793 |
+
"grad_norm": 0.6484375,
|
137794 |
+
"learning_rate": 6.437185929648241e-05,
|
137795 |
+
"loss": 2.0878,
|
137796 |
+
"step": 195730
|
137797 |
+
},
|
137798 |
+
{
|
137799 |
+
"epoch": 0.9787,
|
137800 |
+
"grad_norm": 0.6484375,
|
137801 |
+
"learning_rate": 6.422110552763819e-05,
|
137802 |
+
"loss": 2.0973,
|
137803 |
+
"step": 195740
|
137804 |
+
},
|
137805 |
+
{
|
137806 |
+
"epoch": 0.97875,
|
137807 |
+
"grad_norm": 0.60546875,
|
137808 |
+
"learning_rate": 6.407035175879397e-05,
|
137809 |
+
"loss": 2.0905,
|
137810 |
+
"step": 195750
|
137811 |
+
},
|
137812 |
+
{
|
137813 |
+
"epoch": 0.9788,
|
137814 |
+
"grad_norm": 0.55859375,
|
137815 |
+
"learning_rate": 6.391959798994975e-05,
|
137816 |
+
"loss": 2.0879,
|
137817 |
+
"step": 195760
|
137818 |
+
},
|
137819 |
+
{
|
137820 |
+
"epoch": 0.97885,
|
137821 |
+
"grad_norm": 0.63671875,
|
137822 |
+
"learning_rate": 6.376884422110553e-05,
|
137823 |
+
"loss": 2.0914,
|
137824 |
+
"step": 195770
|
137825 |
+
},
|
137826 |
+
{
|
137827 |
+
"epoch": 0.9789,
|
137828 |
+
"grad_norm": 0.65625,
|
137829 |
+
"learning_rate": 6.361809045226131e-05,
|
137830 |
+
"loss": 2.0636,
|
137831 |
+
"step": 195780
|
137832 |
+
},
|
137833 |
+
{
|
137834 |
+
"epoch": 0.97895,
|
137835 |
+
"grad_norm": 0.578125,
|
137836 |
+
"learning_rate": 6.346733668341709e-05,
|
137837 |
+
"loss": 2.1091,
|
137838 |
+
"step": 195790
|
137839 |
+
},
|
137840 |
+
{
|
137841 |
+
"epoch": 0.979,
|
137842 |
+
"grad_norm": 0.58984375,
|
137843 |
+
"learning_rate": 6.331658291457287e-05,
|
137844 |
+
"loss": 2.0525,
|
137845 |
+
"step": 195800
|
137846 |
+
},
|
137847 |
+
{
|
137848 |
+
"epoch": 0.97905,
|
137849 |
+
"grad_norm": 0.5859375,
|
137850 |
+
"learning_rate": 6.316582914572865e-05,
|
137851 |
+
"loss": 2.0604,
|
137852 |
+
"step": 195810
|
137853 |
+
},
|
137854 |
+
{
|
137855 |
+
"epoch": 0.9791,
|
137856 |
+
"grad_norm": 0.609375,
|
137857 |
+
"learning_rate": 6.301507537688443e-05,
|
137858 |
+
"loss": 2.0758,
|
137859 |
+
"step": 195820
|
137860 |
+
},
|
137861 |
+
{
|
137862 |
+
"epoch": 0.97915,
|
137863 |
+
"grad_norm": 0.61328125,
|
137864 |
+
"learning_rate": 6.28643216080402e-05,
|
137865 |
+
"loss": 2.0839,
|
137866 |
+
"step": 195830
|
137867 |
+
},
|
137868 |
+
{
|
137869 |
+
"epoch": 0.9792,
|
137870 |
+
"grad_norm": 0.58203125,
|
137871 |
+
"learning_rate": 6.271356783919597e-05,
|
137872 |
+
"loss": 2.1228,
|
137873 |
+
"step": 195840
|
137874 |
+
},
|
137875 |
+
{
|
137876 |
+
"epoch": 0.97925,
|
137877 |
+
"grad_norm": 0.6171875,
|
137878 |
+
"learning_rate": 6.256281407035175e-05,
|
137879 |
+
"loss": 2.0687,
|
137880 |
+
"step": 195850
|
137881 |
+
},
|
137882 |
+
{
|
137883 |
+
"epoch": 0.9793,
|
137884 |
+
"grad_norm": 0.671875,
|
137885 |
+
"learning_rate": 6.241206030150753e-05,
|
137886 |
+
"loss": 2.0924,
|
137887 |
+
"step": 195860
|
137888 |
+
},
|
137889 |
+
{
|
137890 |
+
"epoch": 0.97935,
|
137891 |
+
"grad_norm": 0.67578125,
|
137892 |
+
"learning_rate": 6.226130653266331e-05,
|
137893 |
+
"loss": 2.0714,
|
137894 |
+
"step": 195870
|
137895 |
+
},
|
137896 |
+
{
|
137897 |
+
"epoch": 0.9794,
|
137898 |
+
"grad_norm": 0.5625,
|
137899 |
+
"learning_rate": 6.211055276381909e-05,
|
137900 |
+
"loss": 2.0957,
|
137901 |
+
"step": 195880
|
137902 |
+
},
|
137903 |
+
{
|
137904 |
+
"epoch": 0.97945,
|
137905 |
+
"grad_norm": 0.609375,
|
137906 |
+
"learning_rate": 6.195979899497487e-05,
|
137907 |
+
"loss": 2.0787,
|
137908 |
+
"step": 195890
|
137909 |
+
},
|
137910 |
+
{
|
137911 |
+
"epoch": 0.9795,
|
137912 |
+
"grad_norm": 0.625,
|
137913 |
+
"learning_rate": 6.180904522613065e-05,
|
137914 |
+
"loss": 2.053,
|
137915 |
+
"step": 195900
|
137916 |
+
},
|
137917 |
+
{
|
137918 |
+
"epoch": 0.97955,
|
137919 |
+
"grad_norm": 0.6796875,
|
137920 |
+
"learning_rate": 6.165829145728643e-05,
|
137921 |
+
"loss": 2.1153,
|
137922 |
+
"step": 195910
|
137923 |
+
},
|
137924 |
+
{
|
137925 |
+
"epoch": 0.9796,
|
137926 |
+
"grad_norm": 0.65234375,
|
137927 |
+
"learning_rate": 6.150753768844221e-05,
|
137928 |
+
"loss": 2.1045,
|
137929 |
+
"step": 195920
|
137930 |
+
},
|
137931 |
+
{
|
137932 |
+
"epoch": 0.97965,
|
137933 |
+
"grad_norm": 0.6640625,
|
137934 |
+
"learning_rate": 6.135678391959799e-05,
|
137935 |
+
"loss": 2.1306,
|
137936 |
+
"step": 195930
|
137937 |
+
},
|
137938 |
+
{
|
137939 |
+
"epoch": 0.9797,
|
137940 |
+
"grad_norm": 0.63671875,
|
137941 |
+
"learning_rate": 6.120603015075377e-05,
|
137942 |
+
"loss": 2.0628,
|
137943 |
+
"step": 195940
|
137944 |
+
},
|
137945 |
+
{
|
137946 |
+
"epoch": 0.97975,
|
137947 |
+
"grad_norm": 0.62109375,
|
137948 |
+
"learning_rate": 6.105527638190955e-05,
|
137949 |
+
"loss": 2.1002,
|
137950 |
+
"step": 195950
|
137951 |
+
},
|
137952 |
+
{
|
137953 |
+
"epoch": 0.9798,
|
137954 |
+
"grad_norm": 0.58984375,
|
137955 |
+
"learning_rate": 6.090452261306533e-05,
|
137956 |
+
"loss": 2.024,
|
137957 |
+
"step": 195960
|
137958 |
+
},
|
137959 |
+
{
|
137960 |
+
"epoch": 0.97985,
|
137961 |
+
"grad_norm": 0.63671875,
|
137962 |
+
"learning_rate": 6.075376884422111e-05,
|
137963 |
+
"loss": 2.0726,
|
137964 |
+
"step": 195970
|
137965 |
+
},
|
137966 |
+
{
|
137967 |
+
"epoch": 0.9799,
|
137968 |
+
"grad_norm": 0.66015625,
|
137969 |
+
"learning_rate": 6.060301507537689e-05,
|
137970 |
+
"loss": 2.121,
|
137971 |
+
"step": 195980
|
137972 |
+
},
|
137973 |
+
{
|
137974 |
+
"epoch": 0.97995,
|
137975 |
+
"grad_norm": 0.60546875,
|
137976 |
+
"learning_rate": 6.045226130653266e-05,
|
137977 |
+
"loss": 2.1008,
|
137978 |
+
"step": 195990
|
137979 |
+
},
|
137980 |
+
{
|
137981 |
+
"epoch": 0.98,
|
137982 |
+
"grad_norm": 0.59765625,
|
137983 |
+
"learning_rate": 6.030150753768844e-05,
|
137984 |
+
"loss": 2.0839,
|
137985 |
+
"step": 196000
|
137986 |
+
},
|
137987 |
+
{
|
137988 |
+
"epoch": 0.98,
|
137989 |
+
"eval_loss": 2.0759713649749756,
|
137990 |
+
"eval_runtime": 47.6126,
|
137991 |
+
"eval_samples_per_second": 52.507,
|
137992 |
+
"eval_steps_per_second": 0.105,
|
137993 |
+
"step": 196000
|
137994 |
}
|
137995 |
],
|
137996 |
"logging_steps": 10,
|
|
|
138010 |
"attributes": {}
|
138011 |
}
|
138012 |
},
|
138013 |
+
"total_flos": 5.183602659898163e+18,
|
138014 |
"train_batch_size": 64,
|
138015 |
"trial_name": null,
|
138016 |
"trial_params": null
|