nadahlberg
commited on
Commit
•
c8b4fc0
1
Parent(s):
9f35832
Training in progress, step 194000, checkpoint
Browse files- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +1411 -3
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 325690872
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a89c437e50b0f8ca4fd9af67e3083993a3fdc03fe2a27a76b7b04ce1d97eabf5
|
3 |
size 325690872
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 651550778
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97b0a64aecfff2c26bdd5120e954a1d5857e9b29b7d91dd33d19b696968af0e1
|
3 |
size 651550778
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e074e22eba76ab6e9b544774e027528ef7c10076e885ecdf1f0ce9d0a4ffb058
|
3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:522379038ff9f72eee4f16fc13c7c527fd89328d593b292e47a71417cbf520bb
|
3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73b398f0dddef58a9282d66a49fd8e220898de17f6d838d7ffa52e9fffff8e6e
|
3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8dcd63f04d0d7f5e8660b206f8c68db860f40c06e41516d296fd35ca06bcd87f
|
3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afe9ba037bd933543b4aeb0ca749e02c10800870c577fe4d6537ec92694a8a38
|
3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfd5271ec811e7349df93e4abc5b00c0534fac3f75454b6fc73f3a9b47df954c
|
3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07489b2fd53c594e60aa0037945803fe0cdb07239fa8fae63abd5b6720b1edf9
|
3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4819284f798dc8aad3d5a2e6f2bfc72c0dffbf7156d8c82abd86685ea2d9c9df
|
3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81927fcd203392c94aa10d8605dd1de0e335a27d0e6da83feec012c338917f03
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 2000,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -135175,6 +135175,1414 @@
|
|
135175 |
"eval_samples_per_second": 55.2,
|
135176 |
"eval_steps_per_second": 0.11,
|
135177 |
"step": 192000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135178 |
}
|
135179 |
],
|
135180 |
"logging_steps": 10,
|
@@ -135194,7 +136602,7 @@
|
|
135194 |
"attributes": {}
|
135195 |
}
|
135196 |
},
|
135197 |
-
"total_flos": 5.
|
135198 |
"train_batch_size": 64,
|
135199 |
"trial_name": null,
|
135200 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.97,
|
5 |
"eval_steps": 2000,
|
6 |
+
"global_step": 194000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
135175 |
"eval_samples_per_second": 55.2,
|
135176 |
"eval_steps_per_second": 0.11,
|
135177 |
"step": 192000
|
135178 |
+
},
|
135179 |
+
{
|
135180 |
+
"epoch": 0.96005,
|
135181 |
+
"grad_norm": 0.76171875,
|
135182 |
+
"learning_rate": 0.00012045226130653266,
|
135183 |
+
"loss": 2.0404,
|
135184 |
+
"step": 192010
|
135185 |
+
},
|
135186 |
+
{
|
135187 |
+
"epoch": 0.9601,
|
135188 |
+
"grad_norm": 0.5703125,
|
135189 |
+
"learning_rate": 0.00012030150753768844,
|
135190 |
+
"loss": 2.1006,
|
135191 |
+
"step": 192020
|
135192 |
+
},
|
135193 |
+
{
|
135194 |
+
"epoch": 0.96015,
|
135195 |
+
"grad_norm": 0.61328125,
|
135196 |
+
"learning_rate": 0.00012015075376884422,
|
135197 |
+
"loss": 2.0222,
|
135198 |
+
"step": 192030
|
135199 |
+
},
|
135200 |
+
{
|
135201 |
+
"epoch": 0.9602,
|
135202 |
+
"grad_norm": 0.671875,
|
135203 |
+
"learning_rate": 0.00012,
|
135204 |
+
"loss": 2.1103,
|
135205 |
+
"step": 192040
|
135206 |
+
},
|
135207 |
+
{
|
135208 |
+
"epoch": 0.96025,
|
135209 |
+
"grad_norm": 0.5703125,
|
135210 |
+
"learning_rate": 0.00011984924623115578,
|
135211 |
+
"loss": 2.0192,
|
135212 |
+
"step": 192050
|
135213 |
+
},
|
135214 |
+
{
|
135215 |
+
"epoch": 0.9603,
|
135216 |
+
"grad_norm": 0.609375,
|
135217 |
+
"learning_rate": 0.00011969849246231156,
|
135218 |
+
"loss": 2.0935,
|
135219 |
+
"step": 192060
|
135220 |
+
},
|
135221 |
+
{
|
135222 |
+
"epoch": 0.96035,
|
135223 |
+
"grad_norm": 0.57421875,
|
135224 |
+
"learning_rate": 0.00011954773869346734,
|
135225 |
+
"loss": 2.0174,
|
135226 |
+
"step": 192070
|
135227 |
+
},
|
135228 |
+
{
|
135229 |
+
"epoch": 0.9604,
|
135230 |
+
"grad_norm": 0.57421875,
|
135231 |
+
"learning_rate": 0.00011939698492462312,
|
135232 |
+
"loss": 2.0971,
|
135233 |
+
"step": 192080
|
135234 |
+
},
|
135235 |
+
{
|
135236 |
+
"epoch": 0.96045,
|
135237 |
+
"grad_norm": 0.6640625,
|
135238 |
+
"learning_rate": 0.0001192462311557789,
|
135239 |
+
"loss": 2.0244,
|
135240 |
+
"step": 192090
|
135241 |
+
},
|
135242 |
+
{
|
135243 |
+
"epoch": 0.9605,
|
135244 |
+
"grad_norm": 0.57421875,
|
135245 |
+
"learning_rate": 0.00011909547738693468,
|
135246 |
+
"loss": 2.0763,
|
135247 |
+
"step": 192100
|
135248 |
+
},
|
135249 |
+
{
|
135250 |
+
"epoch": 0.96055,
|
135251 |
+
"grad_norm": 0.57421875,
|
135252 |
+
"learning_rate": 0.00011894472361809046,
|
135253 |
+
"loss": 2.0698,
|
135254 |
+
"step": 192110
|
135255 |
+
},
|
135256 |
+
{
|
135257 |
+
"epoch": 0.9606,
|
135258 |
+
"grad_norm": 0.62109375,
|
135259 |
+
"learning_rate": 0.00011879396984924624,
|
135260 |
+
"loss": 2.0831,
|
135261 |
+
"step": 192120
|
135262 |
+
},
|
135263 |
+
{
|
135264 |
+
"epoch": 0.96065,
|
135265 |
+
"grad_norm": 0.6171875,
|
135266 |
+
"learning_rate": 0.00011864321608040202,
|
135267 |
+
"loss": 2.0726,
|
135268 |
+
"step": 192130
|
135269 |
+
},
|
135270 |
+
{
|
135271 |
+
"epoch": 0.9607,
|
135272 |
+
"grad_norm": 0.625,
|
135273 |
+
"learning_rate": 0.0001184924623115578,
|
135274 |
+
"loss": 2.0523,
|
135275 |
+
"step": 192140
|
135276 |
+
},
|
135277 |
+
{
|
135278 |
+
"epoch": 0.96075,
|
135279 |
+
"grad_norm": 0.63671875,
|
135280 |
+
"learning_rate": 0.00011834170854271358,
|
135281 |
+
"loss": 2.1123,
|
135282 |
+
"step": 192150
|
135283 |
+
},
|
135284 |
+
{
|
135285 |
+
"epoch": 0.9608,
|
135286 |
+
"grad_norm": 0.640625,
|
135287 |
+
"learning_rate": 0.00011819095477386936,
|
135288 |
+
"loss": 2.0639,
|
135289 |
+
"step": 192160
|
135290 |
+
},
|
135291 |
+
{
|
135292 |
+
"epoch": 0.96085,
|
135293 |
+
"grad_norm": 0.6015625,
|
135294 |
+
"learning_rate": 0.00011804020100502514,
|
135295 |
+
"loss": 2.0743,
|
135296 |
+
"step": 192170
|
135297 |
+
},
|
135298 |
+
{
|
135299 |
+
"epoch": 0.9609,
|
135300 |
+
"grad_norm": 0.6328125,
|
135301 |
+
"learning_rate": 0.0001178894472361809,
|
135302 |
+
"loss": 2.0775,
|
135303 |
+
"step": 192180
|
135304 |
+
},
|
135305 |
+
{
|
135306 |
+
"epoch": 0.96095,
|
135307 |
+
"grad_norm": 0.546875,
|
135308 |
+
"learning_rate": 0.00011773869346733669,
|
135309 |
+
"loss": 2.1002,
|
135310 |
+
"step": 192190
|
135311 |
+
},
|
135312 |
+
{
|
135313 |
+
"epoch": 0.961,
|
135314 |
+
"grad_norm": 0.6015625,
|
135315 |
+
"learning_rate": 0.00011758793969849247,
|
135316 |
+
"loss": 2.0077,
|
135317 |
+
"step": 192200
|
135318 |
+
},
|
135319 |
+
{
|
135320 |
+
"epoch": 0.96105,
|
135321 |
+
"grad_norm": 0.6171875,
|
135322 |
+
"learning_rate": 0.00011743718592964824,
|
135323 |
+
"loss": 2.053,
|
135324 |
+
"step": 192210
|
135325 |
+
},
|
135326 |
+
{
|
135327 |
+
"epoch": 0.9611,
|
135328 |
+
"grad_norm": 0.62890625,
|
135329 |
+
"learning_rate": 0.00011728643216080402,
|
135330 |
+
"loss": 2.0919,
|
135331 |
+
"step": 192220
|
135332 |
+
},
|
135333 |
+
{
|
135334 |
+
"epoch": 0.96115,
|
135335 |
+
"grad_norm": 0.56640625,
|
135336 |
+
"learning_rate": 0.0001171356783919598,
|
135337 |
+
"loss": 2.025,
|
135338 |
+
"step": 192230
|
135339 |
+
},
|
135340 |
+
{
|
135341 |
+
"epoch": 0.9612,
|
135342 |
+
"grad_norm": 0.62890625,
|
135343 |
+
"learning_rate": 0.00011698492462311558,
|
135344 |
+
"loss": 2.1223,
|
135345 |
+
"step": 192240
|
135346 |
+
},
|
135347 |
+
{
|
135348 |
+
"epoch": 0.96125,
|
135349 |
+
"grad_norm": 0.69140625,
|
135350 |
+
"learning_rate": 0.00011683417085427136,
|
135351 |
+
"loss": 2.0455,
|
135352 |
+
"step": 192250
|
135353 |
+
},
|
135354 |
+
{
|
135355 |
+
"epoch": 0.9613,
|
135356 |
+
"grad_norm": 0.69140625,
|
135357 |
+
"learning_rate": 0.00011668341708542714,
|
135358 |
+
"loss": 2.0944,
|
135359 |
+
"step": 192260
|
135360 |
+
},
|
135361 |
+
{
|
135362 |
+
"epoch": 0.96135,
|
135363 |
+
"grad_norm": 0.7265625,
|
135364 |
+
"learning_rate": 0.00011653266331658292,
|
135365 |
+
"loss": 2.0621,
|
135366 |
+
"step": 192270
|
135367 |
+
},
|
135368 |
+
{
|
135369 |
+
"epoch": 0.9614,
|
135370 |
+
"grad_norm": 0.61328125,
|
135371 |
+
"learning_rate": 0.0001163819095477387,
|
135372 |
+
"loss": 2.0937,
|
135373 |
+
"step": 192280
|
135374 |
+
},
|
135375 |
+
{
|
135376 |
+
"epoch": 0.96145,
|
135377 |
+
"grad_norm": 0.578125,
|
135378 |
+
"learning_rate": 0.00011623115577889448,
|
135379 |
+
"loss": 2.0469,
|
135380 |
+
"step": 192290
|
135381 |
+
},
|
135382 |
+
{
|
135383 |
+
"epoch": 0.9615,
|
135384 |
+
"grad_norm": 0.65234375,
|
135385 |
+
"learning_rate": 0.00011608040201005026,
|
135386 |
+
"loss": 2.0901,
|
135387 |
+
"step": 192300
|
135388 |
+
},
|
135389 |
+
{
|
135390 |
+
"epoch": 0.96155,
|
135391 |
+
"grad_norm": 0.60546875,
|
135392 |
+
"learning_rate": 0.00011592964824120604,
|
135393 |
+
"loss": 2.0236,
|
135394 |
+
"step": 192310
|
135395 |
+
},
|
135396 |
+
{
|
135397 |
+
"epoch": 0.9616,
|
135398 |
+
"grad_norm": 0.5703125,
|
135399 |
+
"learning_rate": 0.00011577889447236182,
|
135400 |
+
"loss": 2.0918,
|
135401 |
+
"step": 192320
|
135402 |
+
},
|
135403 |
+
{
|
135404 |
+
"epoch": 0.96165,
|
135405 |
+
"grad_norm": 0.58203125,
|
135406 |
+
"learning_rate": 0.0001156281407035176,
|
135407 |
+
"loss": 2.0807,
|
135408 |
+
"step": 192330
|
135409 |
+
},
|
135410 |
+
{
|
135411 |
+
"epoch": 0.9617,
|
135412 |
+
"grad_norm": 0.58984375,
|
135413 |
+
"learning_rate": 0.00011547738693467338,
|
135414 |
+
"loss": 2.0503,
|
135415 |
+
"step": 192340
|
135416 |
+
},
|
135417 |
+
{
|
135418 |
+
"epoch": 0.96175,
|
135419 |
+
"grad_norm": 0.59765625,
|
135420 |
+
"learning_rate": 0.00011532663316582916,
|
135421 |
+
"loss": 2.1287,
|
135422 |
+
"step": 192350
|
135423 |
+
},
|
135424 |
+
{
|
135425 |
+
"epoch": 0.9618,
|
135426 |
+
"grad_norm": 0.63671875,
|
135427 |
+
"learning_rate": 0.00011517587939698494,
|
135428 |
+
"loss": 2.1066,
|
135429 |
+
"step": 192360
|
135430 |
+
},
|
135431 |
+
{
|
135432 |
+
"epoch": 0.96185,
|
135433 |
+
"grad_norm": 0.63671875,
|
135434 |
+
"learning_rate": 0.00011502512562814072,
|
135435 |
+
"loss": 2.1153,
|
135436 |
+
"step": 192370
|
135437 |
+
},
|
135438 |
+
{
|
135439 |
+
"epoch": 0.9619,
|
135440 |
+
"grad_norm": 0.609375,
|
135441 |
+
"learning_rate": 0.0001148743718592965,
|
135442 |
+
"loss": 2.0449,
|
135443 |
+
"step": 192380
|
135444 |
+
},
|
135445 |
+
{
|
135446 |
+
"epoch": 0.96195,
|
135447 |
+
"grad_norm": 0.5859375,
|
135448 |
+
"learning_rate": 0.00011472361809045227,
|
135449 |
+
"loss": 2.0976,
|
135450 |
+
"step": 192390
|
135451 |
+
},
|
135452 |
+
{
|
135453 |
+
"epoch": 0.962,
|
135454 |
+
"grad_norm": 0.6484375,
|
135455 |
+
"learning_rate": 0.00011457286432160805,
|
135456 |
+
"loss": 2.0822,
|
135457 |
+
"step": 192400
|
135458 |
+
},
|
135459 |
+
{
|
135460 |
+
"epoch": 0.96205,
|
135461 |
+
"grad_norm": 0.6171875,
|
135462 |
+
"learning_rate": 0.00011442211055276383,
|
135463 |
+
"loss": 2.0671,
|
135464 |
+
"step": 192410
|
135465 |
+
},
|
135466 |
+
{
|
135467 |
+
"epoch": 0.9621,
|
135468 |
+
"grad_norm": 0.60546875,
|
135469 |
+
"learning_rate": 0.0001142713567839196,
|
135470 |
+
"loss": 2.0809,
|
135471 |
+
"step": 192420
|
135472 |
+
},
|
135473 |
+
{
|
135474 |
+
"epoch": 0.96215,
|
135475 |
+
"grad_norm": 0.5703125,
|
135476 |
+
"learning_rate": 0.00011412060301507539,
|
135477 |
+
"loss": 2.0569,
|
135478 |
+
"step": 192430
|
135479 |
+
},
|
135480 |
+
{
|
135481 |
+
"epoch": 0.9622,
|
135482 |
+
"grad_norm": 0.671875,
|
135483 |
+
"learning_rate": 0.00011396984924623116,
|
135484 |
+
"loss": 2.085,
|
135485 |
+
"step": 192440
|
135486 |
+
},
|
135487 |
+
{
|
135488 |
+
"epoch": 0.96225,
|
135489 |
+
"grad_norm": 0.59375,
|
135490 |
+
"learning_rate": 0.00011381909547738694,
|
135491 |
+
"loss": 2.0742,
|
135492 |
+
"step": 192450
|
135493 |
+
},
|
135494 |
+
{
|
135495 |
+
"epoch": 0.9623,
|
135496 |
+
"grad_norm": 0.59765625,
|
135497 |
+
"learning_rate": 0.00011366834170854272,
|
135498 |
+
"loss": 2.0884,
|
135499 |
+
"step": 192460
|
135500 |
+
},
|
135501 |
+
{
|
135502 |
+
"epoch": 0.96235,
|
135503 |
+
"grad_norm": 0.625,
|
135504 |
+
"learning_rate": 0.0001135175879396985,
|
135505 |
+
"loss": 2.0131,
|
135506 |
+
"step": 192470
|
135507 |
+
},
|
135508 |
+
{
|
135509 |
+
"epoch": 0.9624,
|
135510 |
+
"grad_norm": 0.640625,
|
135511 |
+
"learning_rate": 0.00011336683417085426,
|
135512 |
+
"loss": 2.081,
|
135513 |
+
"step": 192480
|
135514 |
+
},
|
135515 |
+
{
|
135516 |
+
"epoch": 0.96245,
|
135517 |
+
"grad_norm": 0.546875,
|
135518 |
+
"learning_rate": 0.00011321608040201004,
|
135519 |
+
"loss": 2.0745,
|
135520 |
+
"step": 192490
|
135521 |
+
},
|
135522 |
+
{
|
135523 |
+
"epoch": 0.9625,
|
135524 |
+
"grad_norm": 0.5546875,
|
135525 |
+
"learning_rate": 0.00011306532663316582,
|
135526 |
+
"loss": 2.079,
|
135527 |
+
"step": 192500
|
135528 |
+
},
|
135529 |
+
{
|
135530 |
+
"epoch": 0.96255,
|
135531 |
+
"grad_norm": 0.71875,
|
135532 |
+
"learning_rate": 0.0001129145728643216,
|
135533 |
+
"loss": 2.0357,
|
135534 |
+
"step": 192510
|
135535 |
+
},
|
135536 |
+
{
|
135537 |
+
"epoch": 0.9626,
|
135538 |
+
"grad_norm": 0.62890625,
|
135539 |
+
"learning_rate": 0.00011276381909547738,
|
135540 |
+
"loss": 2.099,
|
135541 |
+
"step": 192520
|
135542 |
+
},
|
135543 |
+
{
|
135544 |
+
"epoch": 0.96265,
|
135545 |
+
"grad_norm": 0.70703125,
|
135546 |
+
"learning_rate": 0.00011261306532663316,
|
135547 |
+
"loss": 2.0364,
|
135548 |
+
"step": 192530
|
135549 |
+
},
|
135550 |
+
{
|
135551 |
+
"epoch": 0.9627,
|
135552 |
+
"grad_norm": 0.60546875,
|
135553 |
+
"learning_rate": 0.00011246231155778894,
|
135554 |
+
"loss": 2.0981,
|
135555 |
+
"step": 192540
|
135556 |
+
},
|
135557 |
+
{
|
135558 |
+
"epoch": 0.96275,
|
135559 |
+
"grad_norm": 0.63671875,
|
135560 |
+
"learning_rate": 0.00011231155778894471,
|
135561 |
+
"loss": 2.0898,
|
135562 |
+
"step": 192550
|
135563 |
+
},
|
135564 |
+
{
|
135565 |
+
"epoch": 0.9628,
|
135566 |
+
"grad_norm": 0.5703125,
|
135567 |
+
"learning_rate": 0.0001121608040201005,
|
135568 |
+
"loss": 2.0789,
|
135569 |
+
"step": 192560
|
135570 |
+
},
|
135571 |
+
{
|
135572 |
+
"epoch": 0.96285,
|
135573 |
+
"grad_norm": 0.59765625,
|
135574 |
+
"learning_rate": 0.00011201005025125627,
|
135575 |
+
"loss": 2.1286,
|
135576 |
+
"step": 192570
|
135577 |
+
},
|
135578 |
+
{
|
135579 |
+
"epoch": 0.9629,
|
135580 |
+
"grad_norm": 0.609375,
|
135581 |
+
"learning_rate": 0.00011185929648241205,
|
135582 |
+
"loss": 2.0632,
|
135583 |
+
"step": 192580
|
135584 |
+
},
|
135585 |
+
{
|
135586 |
+
"epoch": 0.96295,
|
135587 |
+
"grad_norm": 0.55078125,
|
135588 |
+
"learning_rate": 0.00011170854271356783,
|
135589 |
+
"loss": 2.0861,
|
135590 |
+
"step": 192590
|
135591 |
+
},
|
135592 |
+
{
|
135593 |
+
"epoch": 0.963,
|
135594 |
+
"grad_norm": 0.5703125,
|
135595 |
+
"learning_rate": 0.00011155778894472361,
|
135596 |
+
"loss": 2.0272,
|
135597 |
+
"step": 192600
|
135598 |
+
},
|
135599 |
+
{
|
135600 |
+
"epoch": 0.96305,
|
135601 |
+
"grad_norm": 0.66015625,
|
135602 |
+
"learning_rate": 0.00011140703517587939,
|
135603 |
+
"loss": 2.1351,
|
135604 |
+
"step": 192610
|
135605 |
+
},
|
135606 |
+
{
|
135607 |
+
"epoch": 0.9631,
|
135608 |
+
"grad_norm": 0.5703125,
|
135609 |
+
"learning_rate": 0.00011125628140703517,
|
135610 |
+
"loss": 2.1006,
|
135611 |
+
"step": 192620
|
135612 |
+
},
|
135613 |
+
{
|
135614 |
+
"epoch": 0.96315,
|
135615 |
+
"grad_norm": 0.6328125,
|
135616 |
+
"learning_rate": 0.00011110552763819095,
|
135617 |
+
"loss": 2.0411,
|
135618 |
+
"step": 192630
|
135619 |
+
},
|
135620 |
+
{
|
135621 |
+
"epoch": 0.9632,
|
135622 |
+
"grad_norm": 0.6171875,
|
135623 |
+
"learning_rate": 0.00011095477386934673,
|
135624 |
+
"loss": 2.0426,
|
135625 |
+
"step": 192640
|
135626 |
+
},
|
135627 |
+
{
|
135628 |
+
"epoch": 0.96325,
|
135629 |
+
"grad_norm": 0.578125,
|
135630 |
+
"learning_rate": 0.00011080402010050251,
|
135631 |
+
"loss": 2.0275,
|
135632 |
+
"step": 192650
|
135633 |
+
},
|
135634 |
+
{
|
135635 |
+
"epoch": 0.9633,
|
135636 |
+
"grad_norm": 0.59765625,
|
135637 |
+
"learning_rate": 0.00011065326633165829,
|
135638 |
+
"loss": 2.0903,
|
135639 |
+
"step": 192660
|
135640 |
+
},
|
135641 |
+
{
|
135642 |
+
"epoch": 0.96335,
|
135643 |
+
"grad_norm": 0.63671875,
|
135644 |
+
"learning_rate": 0.00011050251256281407,
|
135645 |
+
"loss": 2.0704,
|
135646 |
+
"step": 192670
|
135647 |
+
},
|
135648 |
+
{
|
135649 |
+
"epoch": 0.9634,
|
135650 |
+
"grad_norm": 0.6640625,
|
135651 |
+
"learning_rate": 0.00011035175879396985,
|
135652 |
+
"loss": 2.0627,
|
135653 |
+
"step": 192680
|
135654 |
+
},
|
135655 |
+
{
|
135656 |
+
"epoch": 0.96345,
|
135657 |
+
"grad_norm": 0.6484375,
|
135658 |
+
"learning_rate": 0.00011020100502512562,
|
135659 |
+
"loss": 2.0467,
|
135660 |
+
"step": 192690
|
135661 |
+
},
|
135662 |
+
{
|
135663 |
+
"epoch": 0.9635,
|
135664 |
+
"grad_norm": 0.5859375,
|
135665 |
+
"learning_rate": 0.0001100502512562814,
|
135666 |
+
"loss": 2.134,
|
135667 |
+
"step": 192700
|
135668 |
+
},
|
135669 |
+
{
|
135670 |
+
"epoch": 0.96355,
|
135671 |
+
"grad_norm": 0.65625,
|
135672 |
+
"learning_rate": 0.00010989949748743718,
|
135673 |
+
"loss": 2.0142,
|
135674 |
+
"step": 192710
|
135675 |
+
},
|
135676 |
+
{
|
135677 |
+
"epoch": 0.9636,
|
135678 |
+
"grad_norm": 0.6015625,
|
135679 |
+
"learning_rate": 0.00010974874371859296,
|
135680 |
+
"loss": 2.1523,
|
135681 |
+
"step": 192720
|
135682 |
+
},
|
135683 |
+
{
|
135684 |
+
"epoch": 0.96365,
|
135685 |
+
"grad_norm": 0.60546875,
|
135686 |
+
"learning_rate": 0.00010959798994974874,
|
135687 |
+
"loss": 2.0758,
|
135688 |
+
"step": 192730
|
135689 |
+
},
|
135690 |
+
{
|
135691 |
+
"epoch": 0.9637,
|
135692 |
+
"grad_norm": 0.55859375,
|
135693 |
+
"learning_rate": 0.00010944723618090452,
|
135694 |
+
"loss": 2.0931,
|
135695 |
+
"step": 192740
|
135696 |
+
},
|
135697 |
+
{
|
135698 |
+
"epoch": 0.96375,
|
135699 |
+
"grad_norm": 0.5859375,
|
135700 |
+
"learning_rate": 0.0001092964824120603,
|
135701 |
+
"loss": 2.0748,
|
135702 |
+
"step": 192750
|
135703 |
+
},
|
135704 |
+
{
|
135705 |
+
"epoch": 0.9638,
|
135706 |
+
"grad_norm": 0.6171875,
|
135707 |
+
"learning_rate": 0.00010914572864321608,
|
135708 |
+
"loss": 2.089,
|
135709 |
+
"step": 192760
|
135710 |
+
},
|
135711 |
+
{
|
135712 |
+
"epoch": 0.96385,
|
135713 |
+
"grad_norm": 0.5703125,
|
135714 |
+
"learning_rate": 0.00010899497487437186,
|
135715 |
+
"loss": 2.09,
|
135716 |
+
"step": 192770
|
135717 |
+
},
|
135718 |
+
{
|
135719 |
+
"epoch": 0.9639,
|
135720 |
+
"grad_norm": 0.625,
|
135721 |
+
"learning_rate": 0.00010884422110552763,
|
135722 |
+
"loss": 2.0736,
|
135723 |
+
"step": 192780
|
135724 |
+
},
|
135725 |
+
{
|
135726 |
+
"epoch": 0.96395,
|
135727 |
+
"grad_norm": 0.58203125,
|
135728 |
+
"learning_rate": 0.00010869346733668341,
|
135729 |
+
"loss": 2.0489,
|
135730 |
+
"step": 192790
|
135731 |
+
},
|
135732 |
+
{
|
135733 |
+
"epoch": 0.964,
|
135734 |
+
"grad_norm": 0.64453125,
|
135735 |
+
"learning_rate": 0.0001085427135678392,
|
135736 |
+
"loss": 2.056,
|
135737 |
+
"step": 192800
|
135738 |
+
},
|
135739 |
+
{
|
135740 |
+
"epoch": 0.96405,
|
135741 |
+
"grad_norm": 0.61328125,
|
135742 |
+
"learning_rate": 0.00010839195979899497,
|
135743 |
+
"loss": 2.1303,
|
135744 |
+
"step": 192810
|
135745 |
+
},
|
135746 |
+
{
|
135747 |
+
"epoch": 0.9641,
|
135748 |
+
"grad_norm": 0.53515625,
|
135749 |
+
"learning_rate": 0.00010824120603015075,
|
135750 |
+
"loss": 2.0366,
|
135751 |
+
"step": 192820
|
135752 |
+
},
|
135753 |
+
{
|
135754 |
+
"epoch": 0.96415,
|
135755 |
+
"grad_norm": 0.6640625,
|
135756 |
+
"learning_rate": 0.00010809045226130653,
|
135757 |
+
"loss": 2.1007,
|
135758 |
+
"step": 192830
|
135759 |
+
},
|
135760 |
+
{
|
135761 |
+
"epoch": 0.9642,
|
135762 |
+
"grad_norm": 0.64453125,
|
135763 |
+
"learning_rate": 0.00010793969849246231,
|
135764 |
+
"loss": 2.0316,
|
135765 |
+
"step": 192840
|
135766 |
+
},
|
135767 |
+
{
|
135768 |
+
"epoch": 0.96425,
|
135769 |
+
"grad_norm": 0.55859375,
|
135770 |
+
"learning_rate": 0.00010778894472361809,
|
135771 |
+
"loss": 2.0614,
|
135772 |
+
"step": 192850
|
135773 |
+
},
|
135774 |
+
{
|
135775 |
+
"epoch": 0.9643,
|
135776 |
+
"grad_norm": 0.60546875,
|
135777 |
+
"learning_rate": 0.00010763819095477387,
|
135778 |
+
"loss": 2.087,
|
135779 |
+
"step": 192860
|
135780 |
+
},
|
135781 |
+
{
|
135782 |
+
"epoch": 0.96435,
|
135783 |
+
"grad_norm": 0.62109375,
|
135784 |
+
"learning_rate": 0.00010748743718592965,
|
135785 |
+
"loss": 2.0492,
|
135786 |
+
"step": 192870
|
135787 |
+
},
|
135788 |
+
{
|
135789 |
+
"epoch": 0.9644,
|
135790 |
+
"grad_norm": 0.66796875,
|
135791 |
+
"learning_rate": 0.00010733668341708543,
|
135792 |
+
"loss": 2.103,
|
135793 |
+
"step": 192880
|
135794 |
+
},
|
135795 |
+
{
|
135796 |
+
"epoch": 0.96445,
|
135797 |
+
"grad_norm": 0.62890625,
|
135798 |
+
"learning_rate": 0.00010718592964824121,
|
135799 |
+
"loss": 2.0502,
|
135800 |
+
"step": 192890
|
135801 |
+
},
|
135802 |
+
{
|
135803 |
+
"epoch": 0.9645,
|
135804 |
+
"grad_norm": 0.61328125,
|
135805 |
+
"learning_rate": 0.00010703517587939698,
|
135806 |
+
"loss": 2.0828,
|
135807 |
+
"step": 192900
|
135808 |
+
},
|
135809 |
+
{
|
135810 |
+
"epoch": 0.96455,
|
135811 |
+
"grad_norm": 0.6484375,
|
135812 |
+
"learning_rate": 0.00010688442211055276,
|
135813 |
+
"loss": 2.0552,
|
135814 |
+
"step": 192910
|
135815 |
+
},
|
135816 |
+
{
|
135817 |
+
"epoch": 0.9646,
|
135818 |
+
"grad_norm": 0.62890625,
|
135819 |
+
"learning_rate": 0.00010673366834170854,
|
135820 |
+
"loss": 2.0825,
|
135821 |
+
"step": 192920
|
135822 |
+
},
|
135823 |
+
{
|
135824 |
+
"epoch": 0.96465,
|
135825 |
+
"grad_norm": 0.66796875,
|
135826 |
+
"learning_rate": 0.00010658291457286432,
|
135827 |
+
"loss": 2.0672,
|
135828 |
+
"step": 192930
|
135829 |
+
},
|
135830 |
+
{
|
135831 |
+
"epoch": 0.9647,
|
135832 |
+
"grad_norm": 0.609375,
|
135833 |
+
"learning_rate": 0.0001064321608040201,
|
135834 |
+
"loss": 2.0596,
|
135835 |
+
"step": 192940
|
135836 |
+
},
|
135837 |
+
{
|
135838 |
+
"epoch": 0.96475,
|
135839 |
+
"grad_norm": 0.640625,
|
135840 |
+
"learning_rate": 0.00010628140703517588,
|
135841 |
+
"loss": 2.0168,
|
135842 |
+
"step": 192950
|
135843 |
+
},
|
135844 |
+
{
|
135845 |
+
"epoch": 0.9648,
|
135846 |
+
"grad_norm": 0.5859375,
|
135847 |
+
"learning_rate": 0.00010613065326633166,
|
135848 |
+
"loss": 2.0733,
|
135849 |
+
"step": 192960
|
135850 |
+
},
|
135851 |
+
{
|
135852 |
+
"epoch": 0.96485,
|
135853 |
+
"grad_norm": 0.61328125,
|
135854 |
+
"learning_rate": 0.00010597989949748744,
|
135855 |
+
"loss": 2.1119,
|
135856 |
+
"step": 192970
|
135857 |
+
},
|
135858 |
+
{
|
135859 |
+
"epoch": 0.9649,
|
135860 |
+
"grad_norm": 0.5703125,
|
135861 |
+
"learning_rate": 0.00010582914572864322,
|
135862 |
+
"loss": 2.0478,
|
135863 |
+
"step": 192980
|
135864 |
+
},
|
135865 |
+
{
|
135866 |
+
"epoch": 0.96495,
|
135867 |
+
"grad_norm": 0.61328125,
|
135868 |
+
"learning_rate": 0.000105678391959799,
|
135869 |
+
"loss": 2.0958,
|
135870 |
+
"step": 192990
|
135871 |
+
},
|
135872 |
+
{
|
135873 |
+
"epoch": 0.965,
|
135874 |
+
"grad_norm": 0.578125,
|
135875 |
+
"learning_rate": 0.00010552763819095478,
|
135876 |
+
"loss": 2.044,
|
135877 |
+
"step": 193000
|
135878 |
+
},
|
135879 |
+
{
|
135880 |
+
"epoch": 0.96505,
|
135881 |
+
"grad_norm": 0.58984375,
|
135882 |
+
"learning_rate": 0.00010537688442211056,
|
135883 |
+
"loss": 2.0735,
|
135884 |
+
"step": 193010
|
135885 |
+
},
|
135886 |
+
{
|
135887 |
+
"epoch": 0.9651,
|
135888 |
+
"grad_norm": 0.6640625,
|
135889 |
+
"learning_rate": 0.00010522613065326633,
|
135890 |
+
"loss": 2.013,
|
135891 |
+
"step": 193020
|
135892 |
+
},
|
135893 |
+
{
|
135894 |
+
"epoch": 0.96515,
|
135895 |
+
"grad_norm": 0.6328125,
|
135896 |
+
"learning_rate": 0.00010507537688442211,
|
135897 |
+
"loss": 2.0933,
|
135898 |
+
"step": 193030
|
135899 |
+
},
|
135900 |
+
{
|
135901 |
+
"epoch": 0.9652,
|
135902 |
+
"grad_norm": 0.6015625,
|
135903 |
+
"learning_rate": 0.0001049246231155779,
|
135904 |
+
"loss": 2.0616,
|
135905 |
+
"step": 193040
|
135906 |
+
},
|
135907 |
+
{
|
135908 |
+
"epoch": 0.96525,
|
135909 |
+
"grad_norm": 0.5390625,
|
135910 |
+
"learning_rate": 0.00010477386934673367,
|
135911 |
+
"loss": 2.0499,
|
135912 |
+
"step": 193050
|
135913 |
+
},
|
135914 |
+
{
|
135915 |
+
"epoch": 0.9653,
|
135916 |
+
"grad_norm": 0.65625,
|
135917 |
+
"learning_rate": 0.00010462311557788945,
|
135918 |
+
"loss": 2.1086,
|
135919 |
+
"step": 193060
|
135920 |
+
},
|
135921 |
+
{
|
135922 |
+
"epoch": 0.96535,
|
135923 |
+
"grad_norm": 0.609375,
|
135924 |
+
"learning_rate": 0.00010447236180904523,
|
135925 |
+
"loss": 2.0909,
|
135926 |
+
"step": 193070
|
135927 |
+
},
|
135928 |
+
{
|
135929 |
+
"epoch": 0.9654,
|
135930 |
+
"grad_norm": 0.6796875,
|
135931 |
+
"learning_rate": 0.00010432160804020101,
|
135932 |
+
"loss": 2.0897,
|
135933 |
+
"step": 193080
|
135934 |
+
},
|
135935 |
+
{
|
135936 |
+
"epoch": 0.96545,
|
135937 |
+
"grad_norm": 0.60546875,
|
135938 |
+
"learning_rate": 0.00010417085427135679,
|
135939 |
+
"loss": 2.0741,
|
135940 |
+
"step": 193090
|
135941 |
+
},
|
135942 |
+
{
|
135943 |
+
"epoch": 0.9655,
|
135944 |
+
"grad_norm": 0.6484375,
|
135945 |
+
"learning_rate": 0.00010402010050251256,
|
135946 |
+
"loss": 2.128,
|
135947 |
+
"step": 193100
|
135948 |
+
},
|
135949 |
+
{
|
135950 |
+
"epoch": 0.96555,
|
135951 |
+
"grad_norm": 0.61328125,
|
135952 |
+
"learning_rate": 0.00010386934673366834,
|
135953 |
+
"loss": 2.0221,
|
135954 |
+
"step": 193110
|
135955 |
+
},
|
135956 |
+
{
|
135957 |
+
"epoch": 0.9656,
|
135958 |
+
"grad_norm": 0.625,
|
135959 |
+
"learning_rate": 0.00010371859296482412,
|
135960 |
+
"loss": 2.1525,
|
135961 |
+
"step": 193120
|
135962 |
+
},
|
135963 |
+
{
|
135964 |
+
"epoch": 0.96565,
|
135965 |
+
"grad_norm": 0.69140625,
|
135966 |
+
"learning_rate": 0.0001035678391959799,
|
135967 |
+
"loss": 2.0668,
|
135968 |
+
"step": 193130
|
135969 |
+
},
|
135970 |
+
{
|
135971 |
+
"epoch": 0.9657,
|
135972 |
+
"grad_norm": 0.65625,
|
135973 |
+
"learning_rate": 0.00010341708542713568,
|
135974 |
+
"loss": 2.133,
|
135975 |
+
"step": 193140
|
135976 |
+
},
|
135977 |
+
{
|
135978 |
+
"epoch": 0.96575,
|
135979 |
+
"grad_norm": 0.59765625,
|
135980 |
+
"learning_rate": 0.00010326633165829146,
|
135981 |
+
"loss": 2.0543,
|
135982 |
+
"step": 193150
|
135983 |
+
},
|
135984 |
+
{
|
135985 |
+
"epoch": 0.9658,
|
135986 |
+
"grad_norm": 0.5859375,
|
135987 |
+
"learning_rate": 0.00010311557788944724,
|
135988 |
+
"loss": 2.0828,
|
135989 |
+
"step": 193160
|
135990 |
+
},
|
135991 |
+
{
|
135992 |
+
"epoch": 0.96585,
|
135993 |
+
"grad_norm": 0.5859375,
|
135994 |
+
"learning_rate": 0.00010296482412060302,
|
135995 |
+
"loss": 2.1479,
|
135996 |
+
"step": 193170
|
135997 |
+
},
|
135998 |
+
{
|
135999 |
+
"epoch": 0.9659,
|
136000 |
+
"grad_norm": 0.5625,
|
136001 |
+
"learning_rate": 0.0001028140703517588,
|
136002 |
+
"loss": 2.0801,
|
136003 |
+
"step": 193180
|
136004 |
+
},
|
136005 |
+
{
|
136006 |
+
"epoch": 0.96595,
|
136007 |
+
"grad_norm": 0.6171875,
|
136008 |
+
"learning_rate": 0.00010266331658291458,
|
136009 |
+
"loss": 2.1114,
|
136010 |
+
"step": 193190
|
136011 |
+
},
|
136012 |
+
{
|
136013 |
+
"epoch": 0.966,
|
136014 |
+
"grad_norm": 0.66796875,
|
136015 |
+
"learning_rate": 0.00010251256281407036,
|
136016 |
+
"loss": 2.0793,
|
136017 |
+
"step": 193200
|
136018 |
+
},
|
136019 |
+
{
|
136020 |
+
"epoch": 0.96605,
|
136021 |
+
"grad_norm": 0.63671875,
|
136022 |
+
"learning_rate": 0.00010236180904522614,
|
136023 |
+
"loss": 2.1357,
|
136024 |
+
"step": 193210
|
136025 |
+
},
|
136026 |
+
{
|
136027 |
+
"epoch": 0.9661,
|
136028 |
+
"grad_norm": 0.63671875,
|
136029 |
+
"learning_rate": 0.00010221105527638192,
|
136030 |
+
"loss": 2.0579,
|
136031 |
+
"step": 193220
|
136032 |
+
},
|
136033 |
+
{
|
136034 |
+
"epoch": 0.96615,
|
136035 |
+
"grad_norm": 0.65234375,
|
136036 |
+
"learning_rate": 0.0001020603015075377,
|
136037 |
+
"loss": 2.1389,
|
136038 |
+
"step": 193230
|
136039 |
+
},
|
136040 |
+
{
|
136041 |
+
"epoch": 0.9662,
|
136042 |
+
"grad_norm": 0.55859375,
|
136043 |
+
"learning_rate": 0.00010190954773869348,
|
136044 |
+
"loss": 2.082,
|
136045 |
+
"step": 193240
|
136046 |
+
},
|
136047 |
+
{
|
136048 |
+
"epoch": 0.96625,
|
136049 |
+
"grad_norm": 0.57421875,
|
136050 |
+
"learning_rate": 0.00010175879396984925,
|
136051 |
+
"loss": 2.0782,
|
136052 |
+
"step": 193250
|
136053 |
+
},
|
136054 |
+
{
|
136055 |
+
"epoch": 0.9663,
|
136056 |
+
"grad_norm": 0.62109375,
|
136057 |
+
"learning_rate": 0.00010160804020100503,
|
136058 |
+
"loss": 2.0719,
|
136059 |
+
"step": 193260
|
136060 |
+
},
|
136061 |
+
{
|
136062 |
+
"epoch": 0.96635,
|
136063 |
+
"grad_norm": 0.59375,
|
136064 |
+
"learning_rate": 0.00010145728643216081,
|
136065 |
+
"loss": 2.1004,
|
136066 |
+
"step": 193270
|
136067 |
+
},
|
136068 |
+
{
|
136069 |
+
"epoch": 0.9664,
|
136070 |
+
"grad_norm": 0.58984375,
|
136071 |
+
"learning_rate": 0.0001013065326633166,
|
136072 |
+
"loss": 2.0673,
|
136073 |
+
"step": 193280
|
136074 |
+
},
|
136075 |
+
{
|
136076 |
+
"epoch": 0.96645,
|
136077 |
+
"grad_norm": 0.6171875,
|
136078 |
+
"learning_rate": 0.00010115577889447237,
|
136079 |
+
"loss": 2.0743,
|
136080 |
+
"step": 193290
|
136081 |
+
},
|
136082 |
+
{
|
136083 |
+
"epoch": 0.9665,
|
136084 |
+
"grad_norm": 0.73828125,
|
136085 |
+
"learning_rate": 0.00010100502512562815,
|
136086 |
+
"loss": 2.0939,
|
136087 |
+
"step": 193300
|
136088 |
+
},
|
136089 |
+
{
|
136090 |
+
"epoch": 0.96655,
|
136091 |
+
"grad_norm": 0.5859375,
|
136092 |
+
"learning_rate": 0.00010085427135678392,
|
136093 |
+
"loss": 2.0247,
|
136094 |
+
"step": 193310
|
136095 |
+
},
|
136096 |
+
{
|
136097 |
+
"epoch": 0.9666,
|
136098 |
+
"grad_norm": 0.58203125,
|
136099 |
+
"learning_rate": 0.0001007035175879397,
|
136100 |
+
"loss": 2.1287,
|
136101 |
+
"step": 193320
|
136102 |
+
},
|
136103 |
+
{
|
136104 |
+
"epoch": 0.96665,
|
136105 |
+
"grad_norm": 0.6875,
|
136106 |
+
"learning_rate": 0.00010055276381909548,
|
136107 |
+
"loss": 2.0658,
|
136108 |
+
"step": 193330
|
136109 |
+
},
|
136110 |
+
{
|
136111 |
+
"epoch": 0.9667,
|
136112 |
+
"grad_norm": 0.58203125,
|
136113 |
+
"learning_rate": 0.00010040201005025126,
|
136114 |
+
"loss": 2.1019,
|
136115 |
+
"step": 193340
|
136116 |
+
},
|
136117 |
+
{
|
136118 |
+
"epoch": 0.96675,
|
136119 |
+
"grad_norm": 0.640625,
|
136120 |
+
"learning_rate": 0.00010025125628140704,
|
136121 |
+
"loss": 2.0826,
|
136122 |
+
"step": 193350
|
136123 |
+
},
|
136124 |
+
{
|
136125 |
+
"epoch": 0.9668,
|
136126 |
+
"grad_norm": 0.6328125,
|
136127 |
+
"learning_rate": 0.00010010050251256282,
|
136128 |
+
"loss": 2.0846,
|
136129 |
+
"step": 193360
|
136130 |
+
},
|
136131 |
+
{
|
136132 |
+
"epoch": 0.96685,
|
136133 |
+
"grad_norm": 0.68359375,
|
136134 |
+
"learning_rate": 9.99497487437186e-05,
|
136135 |
+
"loss": 2.1111,
|
136136 |
+
"step": 193370
|
136137 |
+
},
|
136138 |
+
{
|
136139 |
+
"epoch": 0.9669,
|
136140 |
+
"grad_norm": 0.60546875,
|
136141 |
+
"learning_rate": 9.979899497487438e-05,
|
136142 |
+
"loss": 2.0623,
|
136143 |
+
"step": 193380
|
136144 |
+
},
|
136145 |
+
{
|
136146 |
+
"epoch": 0.96695,
|
136147 |
+
"grad_norm": 0.62890625,
|
136148 |
+
"learning_rate": 9.964824120603016e-05,
|
136149 |
+
"loss": 2.1013,
|
136150 |
+
"step": 193390
|
136151 |
+
},
|
136152 |
+
{
|
136153 |
+
"epoch": 0.967,
|
136154 |
+
"grad_norm": 0.58984375,
|
136155 |
+
"learning_rate": 9.949748743718594e-05,
|
136156 |
+
"loss": 2.069,
|
136157 |
+
"step": 193400
|
136158 |
+
},
|
136159 |
+
{
|
136160 |
+
"epoch": 0.96705,
|
136161 |
+
"grad_norm": 0.64453125,
|
136162 |
+
"learning_rate": 9.934673366834172e-05,
|
136163 |
+
"loss": 2.1121,
|
136164 |
+
"step": 193410
|
136165 |
+
},
|
136166 |
+
{
|
136167 |
+
"epoch": 0.9671,
|
136168 |
+
"grad_norm": 0.6328125,
|
136169 |
+
"learning_rate": 9.91959798994975e-05,
|
136170 |
+
"loss": 2.0587,
|
136171 |
+
"step": 193420
|
136172 |
+
},
|
136173 |
+
{
|
136174 |
+
"epoch": 0.96715,
|
136175 |
+
"grad_norm": 0.64453125,
|
136176 |
+
"learning_rate": 9.904522613065328e-05,
|
136177 |
+
"loss": 2.0443,
|
136178 |
+
"step": 193430
|
136179 |
+
},
|
136180 |
+
{
|
136181 |
+
"epoch": 0.9672,
|
136182 |
+
"grad_norm": 0.6171875,
|
136183 |
+
"learning_rate": 9.889447236180906e-05,
|
136184 |
+
"loss": 2.0459,
|
136185 |
+
"step": 193440
|
136186 |
+
},
|
136187 |
+
{
|
136188 |
+
"epoch": 0.96725,
|
136189 |
+
"grad_norm": 0.60546875,
|
136190 |
+
"learning_rate": 9.874371859296484e-05,
|
136191 |
+
"loss": 2.0208,
|
136192 |
+
"step": 193450
|
136193 |
+
},
|
136194 |
+
{
|
136195 |
+
"epoch": 0.9673,
|
136196 |
+
"grad_norm": 0.640625,
|
136197 |
+
"learning_rate": 9.859296482412062e-05,
|
136198 |
+
"loss": 2.1135,
|
136199 |
+
"step": 193460
|
136200 |
+
},
|
136201 |
+
{
|
136202 |
+
"epoch": 0.96735,
|
136203 |
+
"grad_norm": 0.59375,
|
136204 |
+
"learning_rate": 9.84422110552764e-05,
|
136205 |
+
"loss": 2.0626,
|
136206 |
+
"step": 193470
|
136207 |
+
},
|
136208 |
+
{
|
136209 |
+
"epoch": 0.9674,
|
136210 |
+
"grad_norm": 0.6484375,
|
136211 |
+
"learning_rate": 9.829145728643218e-05,
|
136212 |
+
"loss": 2.1517,
|
136213 |
+
"step": 193480
|
136214 |
+
},
|
136215 |
+
{
|
136216 |
+
"epoch": 0.96745,
|
136217 |
+
"grad_norm": 0.56640625,
|
136218 |
+
"learning_rate": 9.814070351758795e-05,
|
136219 |
+
"loss": 2.0249,
|
136220 |
+
"step": 193490
|
136221 |
+
},
|
136222 |
+
{
|
136223 |
+
"epoch": 0.9675,
|
136224 |
+
"grad_norm": 0.56640625,
|
136225 |
+
"learning_rate": 9.798994974874373e-05,
|
136226 |
+
"loss": 2.116,
|
136227 |
+
"step": 193500
|
136228 |
+
},
|
136229 |
+
{
|
136230 |
+
"epoch": 0.96755,
|
136231 |
+
"grad_norm": 0.64453125,
|
136232 |
+
"learning_rate": 9.783919597989951e-05,
|
136233 |
+
"loss": 2.0581,
|
136234 |
+
"step": 193510
|
136235 |
+
},
|
136236 |
+
{
|
136237 |
+
"epoch": 0.9676,
|
136238 |
+
"grad_norm": 0.6328125,
|
136239 |
+
"learning_rate": 9.768844221105528e-05,
|
136240 |
+
"loss": 2.1323,
|
136241 |
+
"step": 193520
|
136242 |
+
},
|
136243 |
+
{
|
136244 |
+
"epoch": 0.96765,
|
136245 |
+
"grad_norm": 0.67578125,
|
136246 |
+
"learning_rate": 9.753768844221106e-05,
|
136247 |
+
"loss": 2.0611,
|
136248 |
+
"step": 193530
|
136249 |
+
},
|
136250 |
+
{
|
136251 |
+
"epoch": 0.9677,
|
136252 |
+
"grad_norm": 0.64453125,
|
136253 |
+
"learning_rate": 9.738693467336684e-05,
|
136254 |
+
"loss": 2.0753,
|
136255 |
+
"step": 193540
|
136256 |
+
},
|
136257 |
+
{
|
136258 |
+
"epoch": 0.96775,
|
136259 |
+
"grad_norm": 0.640625,
|
136260 |
+
"learning_rate": 9.723618090452262e-05,
|
136261 |
+
"loss": 2.0894,
|
136262 |
+
"step": 193550
|
136263 |
+
},
|
136264 |
+
{
|
136265 |
+
"epoch": 0.9678,
|
136266 |
+
"grad_norm": 0.578125,
|
136267 |
+
"learning_rate": 9.70854271356784e-05,
|
136268 |
+
"loss": 2.0414,
|
136269 |
+
"step": 193560
|
136270 |
+
},
|
136271 |
+
{
|
136272 |
+
"epoch": 0.96785,
|
136273 |
+
"grad_norm": 0.69921875,
|
136274 |
+
"learning_rate": 9.693467336683418e-05,
|
136275 |
+
"loss": 2.0803,
|
136276 |
+
"step": 193570
|
136277 |
+
},
|
136278 |
+
{
|
136279 |
+
"epoch": 0.9679,
|
136280 |
+
"grad_norm": 0.56640625,
|
136281 |
+
"learning_rate": 9.678391959798996e-05,
|
136282 |
+
"loss": 2.0275,
|
136283 |
+
"step": 193580
|
136284 |
+
},
|
136285 |
+
{
|
136286 |
+
"epoch": 0.96795,
|
136287 |
+
"grad_norm": 0.60546875,
|
136288 |
+
"learning_rate": 9.663316582914574e-05,
|
136289 |
+
"loss": 2.1239,
|
136290 |
+
"step": 193590
|
136291 |
+
},
|
136292 |
+
{
|
136293 |
+
"epoch": 0.968,
|
136294 |
+
"grad_norm": 0.5859375,
|
136295 |
+
"learning_rate": 9.648241206030152e-05,
|
136296 |
+
"loss": 2.0337,
|
136297 |
+
"step": 193600
|
136298 |
+
},
|
136299 |
+
{
|
136300 |
+
"epoch": 0.96805,
|
136301 |
+
"grad_norm": 0.62109375,
|
136302 |
+
"learning_rate": 9.63316582914573e-05,
|
136303 |
+
"loss": 2.0741,
|
136304 |
+
"step": 193610
|
136305 |
+
},
|
136306 |
+
{
|
136307 |
+
"epoch": 0.9681,
|
136308 |
+
"grad_norm": 0.66015625,
|
136309 |
+
"learning_rate": 9.618090452261308e-05,
|
136310 |
+
"loss": 2.0599,
|
136311 |
+
"step": 193620
|
136312 |
+
},
|
136313 |
+
{
|
136314 |
+
"epoch": 0.96815,
|
136315 |
+
"grad_norm": 0.62890625,
|
136316 |
+
"learning_rate": 9.603015075376886e-05,
|
136317 |
+
"loss": 2.0559,
|
136318 |
+
"step": 193630
|
136319 |
+
},
|
136320 |
+
{
|
136321 |
+
"epoch": 0.9682,
|
136322 |
+
"grad_norm": 0.640625,
|
136323 |
+
"learning_rate": 9.587939698492461e-05,
|
136324 |
+
"loss": 2.1606,
|
136325 |
+
"step": 193640
|
136326 |
+
},
|
136327 |
+
{
|
136328 |
+
"epoch": 0.96825,
|
136329 |
+
"grad_norm": 0.62109375,
|
136330 |
+
"learning_rate": 9.572864321608039e-05,
|
136331 |
+
"loss": 2.0655,
|
136332 |
+
"step": 193650
|
136333 |
+
},
|
136334 |
+
{
|
136335 |
+
"epoch": 0.9683,
|
136336 |
+
"grad_norm": 0.58203125,
|
136337 |
+
"learning_rate": 9.557788944723617e-05,
|
136338 |
+
"loss": 2.0956,
|
136339 |
+
"step": 193660
|
136340 |
+
},
|
136341 |
+
{
|
136342 |
+
"epoch": 0.96835,
|
136343 |
+
"grad_norm": 0.6328125,
|
136344 |
+
"learning_rate": 9.542713567839195e-05,
|
136345 |
+
"loss": 2.0487,
|
136346 |
+
"step": 193670
|
136347 |
+
},
|
136348 |
+
{
|
136349 |
+
"epoch": 0.9684,
|
136350 |
+
"grad_norm": 0.60546875,
|
136351 |
+
"learning_rate": 9.527638190954773e-05,
|
136352 |
+
"loss": 2.0721,
|
136353 |
+
"step": 193680
|
136354 |
+
},
|
136355 |
+
{
|
136356 |
+
"epoch": 0.96845,
|
136357 |
+
"grad_norm": 0.62109375,
|
136358 |
+
"learning_rate": 9.512562814070351e-05,
|
136359 |
+
"loss": 2.108,
|
136360 |
+
"step": 193690
|
136361 |
+
},
|
136362 |
+
{
|
136363 |
+
"epoch": 0.9685,
|
136364 |
+
"grad_norm": 0.6484375,
|
136365 |
+
"learning_rate": 9.497487437185929e-05,
|
136366 |
+
"loss": 2.09,
|
136367 |
+
"step": 193700
|
136368 |
+
},
|
136369 |
+
{
|
136370 |
+
"epoch": 0.96855,
|
136371 |
+
"grad_norm": 0.61328125,
|
136372 |
+
"learning_rate": 9.482412060301507e-05,
|
136373 |
+
"loss": 2.0447,
|
136374 |
+
"step": 193710
|
136375 |
+
},
|
136376 |
+
{
|
136377 |
+
"epoch": 0.9686,
|
136378 |
+
"grad_norm": 0.578125,
|
136379 |
+
"learning_rate": 9.467336683417085e-05,
|
136380 |
+
"loss": 2.0791,
|
136381 |
+
"step": 193720
|
136382 |
+
},
|
136383 |
+
{
|
136384 |
+
"epoch": 0.96865,
|
136385 |
+
"grad_norm": 0.5859375,
|
136386 |
+
"learning_rate": 9.452261306532663e-05,
|
136387 |
+
"loss": 2.1149,
|
136388 |
+
"step": 193730
|
136389 |
+
},
|
136390 |
+
{
|
136391 |
+
"epoch": 0.9687,
|
136392 |
+
"grad_norm": 0.59765625,
|
136393 |
+
"learning_rate": 9.437185929648241e-05,
|
136394 |
+
"loss": 2.0379,
|
136395 |
+
"step": 193740
|
136396 |
+
},
|
136397 |
+
{
|
136398 |
+
"epoch": 0.96875,
|
136399 |
+
"grad_norm": 0.53125,
|
136400 |
+
"learning_rate": 9.422110552763819e-05,
|
136401 |
+
"loss": 2.0959,
|
136402 |
+
"step": 193750
|
136403 |
+
},
|
136404 |
+
{
|
136405 |
+
"epoch": 0.9688,
|
136406 |
+
"grad_norm": 0.58984375,
|
136407 |
+
"learning_rate": 9.407035175879397e-05,
|
136408 |
+
"loss": 2.0678,
|
136409 |
+
"step": 193760
|
136410 |
+
},
|
136411 |
+
{
|
136412 |
+
"epoch": 0.96885,
|
136413 |
+
"grad_norm": 0.66796875,
|
136414 |
+
"learning_rate": 9.391959798994975e-05,
|
136415 |
+
"loss": 2.1388,
|
136416 |
+
"step": 193770
|
136417 |
+
},
|
136418 |
+
{
|
136419 |
+
"epoch": 0.9689,
|
136420 |
+
"grad_norm": 0.60546875,
|
136421 |
+
"learning_rate": 9.376884422110553e-05,
|
136422 |
+
"loss": 2.09,
|
136423 |
+
"step": 193780
|
136424 |
+
},
|
136425 |
+
{
|
136426 |
+
"epoch": 0.96895,
|
136427 |
+
"grad_norm": 0.60546875,
|
136428 |
+
"learning_rate": 9.36180904522613e-05,
|
136429 |
+
"loss": 2.1365,
|
136430 |
+
"step": 193790
|
136431 |
+
},
|
136432 |
+
{
|
136433 |
+
"epoch": 0.969,
|
136434 |
+
"grad_norm": 0.609375,
|
136435 |
+
"learning_rate": 9.346733668341709e-05,
|
136436 |
+
"loss": 2.0655,
|
136437 |
+
"step": 193800
|
136438 |
+
},
|
136439 |
+
{
|
136440 |
+
"epoch": 0.96905,
|
136441 |
+
"grad_norm": 0.57421875,
|
136442 |
+
"learning_rate": 9.331658291457287e-05,
|
136443 |
+
"loss": 2.0588,
|
136444 |
+
"step": 193810
|
136445 |
+
},
|
136446 |
+
{
|
136447 |
+
"epoch": 0.9691,
|
136448 |
+
"grad_norm": 0.6640625,
|
136449 |
+
"learning_rate": 9.316582914572864e-05,
|
136450 |
+
"loss": 2.1514,
|
136451 |
+
"step": 193820
|
136452 |
+
},
|
136453 |
+
{
|
136454 |
+
"epoch": 0.96915,
|
136455 |
+
"grad_norm": 0.67578125,
|
136456 |
+
"learning_rate": 9.301507537688442e-05,
|
136457 |
+
"loss": 2.0114,
|
136458 |
+
"step": 193830
|
136459 |
+
},
|
136460 |
+
{
|
136461 |
+
"epoch": 0.9692,
|
136462 |
+
"grad_norm": 0.56640625,
|
136463 |
+
"learning_rate": 9.28643216080402e-05,
|
136464 |
+
"loss": 2.0999,
|
136465 |
+
"step": 193840
|
136466 |
+
},
|
136467 |
+
{
|
136468 |
+
"epoch": 0.96925,
|
136469 |
+
"grad_norm": 0.6328125,
|
136470 |
+
"learning_rate": 9.271356783919598e-05,
|
136471 |
+
"loss": 2.0517,
|
136472 |
+
"step": 193850
|
136473 |
+
},
|
136474 |
+
{
|
136475 |
+
"epoch": 0.9693,
|
136476 |
+
"grad_norm": 0.6796875,
|
136477 |
+
"learning_rate": 9.256281407035176e-05,
|
136478 |
+
"loss": 2.1083,
|
136479 |
+
"step": 193860
|
136480 |
+
},
|
136481 |
+
{
|
136482 |
+
"epoch": 0.96935,
|
136483 |
+
"grad_norm": 0.58984375,
|
136484 |
+
"learning_rate": 9.241206030150754e-05,
|
136485 |
+
"loss": 2.0611,
|
136486 |
+
"step": 193870
|
136487 |
+
},
|
136488 |
+
{
|
136489 |
+
"epoch": 0.9694,
|
136490 |
+
"grad_norm": 0.625,
|
136491 |
+
"learning_rate": 9.226130653266332e-05,
|
136492 |
+
"loss": 2.0811,
|
136493 |
+
"step": 193880
|
136494 |
+
},
|
136495 |
+
{
|
136496 |
+
"epoch": 0.96945,
|
136497 |
+
"grad_norm": 0.58984375,
|
136498 |
+
"learning_rate": 9.21105527638191e-05,
|
136499 |
+
"loss": 2.0413,
|
136500 |
+
"step": 193890
|
136501 |
+
},
|
136502 |
+
{
|
136503 |
+
"epoch": 0.9695,
|
136504 |
+
"grad_norm": 0.625,
|
136505 |
+
"learning_rate": 9.195979899497488e-05,
|
136506 |
+
"loss": 2.0435,
|
136507 |
+
"step": 193900
|
136508 |
+
},
|
136509 |
+
{
|
136510 |
+
"epoch": 0.96955,
|
136511 |
+
"grad_norm": 0.625,
|
136512 |
+
"learning_rate": 9.180904522613066e-05,
|
136513 |
+
"loss": 2.1016,
|
136514 |
+
"step": 193910
|
136515 |
+
},
|
136516 |
+
{
|
136517 |
+
"epoch": 0.9696,
|
136518 |
+
"grad_norm": 0.58203125,
|
136519 |
+
"learning_rate": 9.165829145728644e-05,
|
136520 |
+
"loss": 2.1049,
|
136521 |
+
"step": 193920
|
136522 |
+
},
|
136523 |
+
{
|
136524 |
+
"epoch": 0.96965,
|
136525 |
+
"grad_norm": 0.609375,
|
136526 |
+
"learning_rate": 9.150753768844221e-05,
|
136527 |
+
"loss": 2.0746,
|
136528 |
+
"step": 193930
|
136529 |
+
},
|
136530 |
+
{
|
136531 |
+
"epoch": 0.9697,
|
136532 |
+
"grad_norm": 0.66796875,
|
136533 |
+
"learning_rate": 9.135678391959799e-05,
|
136534 |
+
"loss": 2.0927,
|
136535 |
+
"step": 193940
|
136536 |
+
},
|
136537 |
+
{
|
136538 |
+
"epoch": 0.96975,
|
136539 |
+
"grad_norm": 0.63671875,
|
136540 |
+
"learning_rate": 9.120603015075377e-05,
|
136541 |
+
"loss": 2.109,
|
136542 |
+
"step": 193950
|
136543 |
+
},
|
136544 |
+
{
|
136545 |
+
"epoch": 0.9698,
|
136546 |
+
"grad_norm": 0.66796875,
|
136547 |
+
"learning_rate": 9.105527638190955e-05,
|
136548 |
+
"loss": 2.0889,
|
136549 |
+
"step": 193960
|
136550 |
+
},
|
136551 |
+
{
|
136552 |
+
"epoch": 0.96985,
|
136553 |
+
"grad_norm": 0.60546875,
|
136554 |
+
"learning_rate": 9.090452261306533e-05,
|
136555 |
+
"loss": 2.0564,
|
136556 |
+
"step": 193970
|
136557 |
+
},
|
136558 |
+
{
|
136559 |
+
"epoch": 0.9699,
|
136560 |
+
"grad_norm": 0.7265625,
|
136561 |
+
"learning_rate": 9.075376884422111e-05,
|
136562 |
+
"loss": 2.0829,
|
136563 |
+
"step": 193980
|
136564 |
+
},
|
136565 |
+
{
|
136566 |
+
"epoch": 0.96995,
|
136567 |
+
"grad_norm": 0.6953125,
|
136568 |
+
"learning_rate": 9.060301507537689e-05,
|
136569 |
+
"loss": 2.0556,
|
136570 |
+
"step": 193990
|
136571 |
+
},
|
136572 |
+
{
|
136573 |
+
"epoch": 0.97,
|
136574 |
+
"grad_norm": 0.6328125,
|
136575 |
+
"learning_rate": 9.045226130653267e-05,
|
136576 |
+
"loss": 2.1009,
|
136577 |
+
"step": 194000
|
136578 |
+
},
|
136579 |
+
{
|
136580 |
+
"epoch": 0.97,
|
136581 |
+
"eval_loss": 2.07601261138916,
|
136582 |
+
"eval_runtime": 47.1258,
|
136583 |
+
"eval_samples_per_second": 53.049,
|
136584 |
+
"eval_steps_per_second": 0.106,
|
136585 |
+
"step": 194000
|
136586 |
}
|
136587 |
],
|
136588 |
"logging_steps": 10,
|
|
|
136602 |
"attributes": {}
|
136603 |
}
|
136604 |
},
|
136605 |
+
"total_flos": 5.130708755205325e+18,
|
136606 |
"train_batch_size": 64,
|
136607 |
"trial_name": null,
|
136608 |
"trial_params": null
|