dima806 commited on
Commit
ba1e2de
1 Parent(s): 15bbf7a

Upload folder using huggingface_hub

Browse files
checkpoint-4900/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "curly",
13
+ "1": "dreadlocks",
14
+ "2": "kinky",
15
+ "3": "straight",
16
+ "4": "wavy"
17
+ },
18
+ "image_size": 224,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "curly": 0,
23
+ "dreadlocks": 1,
24
+ "kinky": 2,
25
+ "straight": 3,
26
+ "wavy": 4
27
+ },
28
+ "layer_norm_eps": 1e-12,
29
+ "model_type": "vit",
30
+ "num_attention_heads": 12,
31
+ "num_channels": 3,
32
+ "num_hidden_layers": 12,
33
+ "patch_size": 16,
34
+ "problem_type": "single_label_classification",
35
+ "qkv_bias": true,
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.45.2"
38
+ }
checkpoint-4900/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:880839388dee970fc3c9a41a9ea13295c6093fea1653d77780af5a05743c9cf2
3
+ size 343233204
checkpoint-4900/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a02b69d9cb3517d77d6efff670a7edd44fb0565ba5a9d703e54737429f44a9ff
3
+ size 686587322
checkpoint-4900/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-4900/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa6d96f7ee624f7c57ede23ba0321ef010817bd064e65eb4db242f525babee44
3
+ size 14244
checkpoint-4900/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8ff882b25de7a093a1ca37813f32aa219aeac50ec93044513164d388e33b66
3
+ size 1064
checkpoint-4900/trainer_state.json ADDED
@@ -0,0 +1,1096 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5298713445663452,
3
+ "best_model_checkpoint": "hair_type_image_detection/checkpoint-4900",
4
+ "epoch": 100.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4900,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.17704280155642024,
14
+ "eval_loss": 1.6219326257705688,
15
+ "eval_model_preparation_time": 0.0053,
16
+ "eval_runtime": 16.3274,
17
+ "eval_samples_per_second": 62.962,
18
+ "eval_steps_per_second": 7.901,
19
+ "step": 49
20
+ },
21
+ {
22
+ "epoch": 2.0,
23
+ "eval_accuracy": 0.22568093385214008,
24
+ "eval_loss": 1.6067438125610352,
25
+ "eval_model_preparation_time": 0.0053,
26
+ "eval_runtime": 16.2468,
27
+ "eval_samples_per_second": 63.274,
28
+ "eval_steps_per_second": 7.94,
29
+ "step": 98
30
+ },
31
+ {
32
+ "epoch": 3.0,
33
+ "eval_accuracy": 0.27529182879377434,
34
+ "eval_loss": 1.5912580490112305,
35
+ "eval_model_preparation_time": 0.0053,
36
+ "eval_runtime": 16.4806,
37
+ "eval_samples_per_second": 62.377,
38
+ "eval_steps_per_second": 7.827,
39
+ "step": 147
40
+ },
41
+ {
42
+ "epoch": 4.0,
43
+ "eval_accuracy": 0.33073929961089493,
44
+ "eval_loss": 1.5748045444488525,
45
+ "eval_model_preparation_time": 0.0053,
46
+ "eval_runtime": 16.0231,
47
+ "eval_samples_per_second": 64.157,
48
+ "eval_steps_per_second": 8.051,
49
+ "step": 196
50
+ },
51
+ {
52
+ "epoch": 5.0,
53
+ "eval_accuracy": 0.39299610894941633,
54
+ "eval_loss": 1.5561603307724,
55
+ "eval_model_preparation_time": 0.0053,
56
+ "eval_runtime": 16.3268,
57
+ "eval_samples_per_second": 62.964,
58
+ "eval_steps_per_second": 7.901,
59
+ "step": 245
60
+ },
61
+ {
62
+ "epoch": 6.0,
63
+ "eval_accuracy": 0.46400778210116733,
64
+ "eval_loss": 1.5354892015457153,
65
+ "eval_model_preparation_time": 0.0053,
66
+ "eval_runtime": 16.3682,
67
+ "eval_samples_per_second": 62.805,
68
+ "eval_steps_per_second": 7.881,
69
+ "step": 294
70
+ },
71
+ {
72
+ "epoch": 7.0,
73
+ "eval_accuracy": 0.5223735408560312,
74
+ "eval_loss": 1.5132097005844116,
75
+ "eval_model_preparation_time": 0.0053,
76
+ "eval_runtime": 16.1784,
77
+ "eval_samples_per_second": 63.542,
78
+ "eval_steps_per_second": 7.974,
79
+ "step": 343
80
+ },
81
+ {
82
+ "epoch": 8.0,
83
+ "eval_accuracy": 0.5700389105058365,
84
+ "eval_loss": 1.4904340505599976,
85
+ "eval_model_preparation_time": 0.0053,
86
+ "eval_runtime": 15.9362,
87
+ "eval_samples_per_second": 64.507,
88
+ "eval_steps_per_second": 8.095,
89
+ "step": 392
90
+ },
91
+ {
92
+ "epoch": 9.0,
93
+ "eval_accuracy": 0.6138132295719845,
94
+ "eval_loss": 1.4679334163665771,
95
+ "eval_model_preparation_time": 0.0053,
96
+ "eval_runtime": 16.4082,
97
+ "eval_samples_per_second": 62.652,
98
+ "eval_steps_per_second": 7.862,
99
+ "step": 441
100
+ },
101
+ {
102
+ "epoch": 10.0,
103
+ "eval_accuracy": 0.6517509727626459,
104
+ "eval_loss": 1.4443553686141968,
105
+ "eval_model_preparation_time": 0.0053,
106
+ "eval_runtime": 16.0563,
107
+ "eval_samples_per_second": 64.025,
108
+ "eval_steps_per_second": 8.034,
109
+ "step": 490
110
+ },
111
+ {
112
+ "epoch": 10.204081632653061,
113
+ "grad_norm": 1.3639816045761108,
114
+ "learning_rate": 9.072164948453608e-07,
115
+ "loss": 1.5205,
116
+ "step": 500
117
+ },
118
+ {
119
+ "epoch": 11.0,
120
+ "eval_accuracy": 0.683852140077821,
121
+ "eval_loss": 1.4206485748291016,
122
+ "eval_model_preparation_time": 0.0053,
123
+ "eval_runtime": 16.0088,
124
+ "eval_samples_per_second": 64.214,
125
+ "eval_steps_per_second": 8.058,
126
+ "step": 539
127
+ },
128
+ {
129
+ "epoch": 12.0,
130
+ "eval_accuracy": 0.7178988326848249,
131
+ "eval_loss": 1.3944063186645508,
132
+ "eval_model_preparation_time": 0.0053,
133
+ "eval_runtime": 15.9211,
134
+ "eval_samples_per_second": 64.568,
135
+ "eval_steps_per_second": 8.102,
136
+ "step": 588
137
+ },
138
+ {
139
+ "epoch": 13.0,
140
+ "eval_accuracy": 0.7441634241245136,
141
+ "eval_loss": 1.3676106929779053,
142
+ "eval_model_preparation_time": 0.0053,
143
+ "eval_runtime": 16.1365,
144
+ "eval_samples_per_second": 63.706,
145
+ "eval_steps_per_second": 7.994,
146
+ "step": 637
147
+ },
148
+ {
149
+ "epoch": 14.0,
150
+ "eval_accuracy": 0.7684824902723736,
151
+ "eval_loss": 1.3409146070480347,
152
+ "eval_model_preparation_time": 0.0053,
153
+ "eval_runtime": 16.0749,
154
+ "eval_samples_per_second": 63.95,
155
+ "eval_steps_per_second": 8.025,
156
+ "step": 686
157
+ },
158
+ {
159
+ "epoch": 15.0,
160
+ "eval_accuracy": 0.7830739299610895,
161
+ "eval_loss": 1.312709927558899,
162
+ "eval_model_preparation_time": 0.0053,
163
+ "eval_runtime": 16.1852,
164
+ "eval_samples_per_second": 63.515,
165
+ "eval_steps_per_second": 7.97,
166
+ "step": 735
167
+ },
168
+ {
169
+ "epoch": 16.0,
170
+ "eval_accuracy": 0.7976653696498055,
171
+ "eval_loss": 1.2852919101715088,
172
+ "eval_model_preparation_time": 0.0053,
173
+ "eval_runtime": 16.5033,
174
+ "eval_samples_per_second": 62.291,
175
+ "eval_steps_per_second": 7.817,
176
+ "step": 784
177
+ },
178
+ {
179
+ "epoch": 17.0,
180
+ "eval_accuracy": 0.8151750972762646,
181
+ "eval_loss": 1.2579333782196045,
182
+ "eval_model_preparation_time": 0.0053,
183
+ "eval_runtime": 16.716,
184
+ "eval_samples_per_second": 61.498,
185
+ "eval_steps_per_second": 7.717,
186
+ "step": 833
187
+ },
188
+ {
189
+ "epoch": 18.0,
190
+ "eval_accuracy": 0.8258754863813229,
191
+ "eval_loss": 1.2285292148590088,
192
+ "eval_model_preparation_time": 0.0053,
193
+ "eval_runtime": 16.3546,
194
+ "eval_samples_per_second": 62.857,
195
+ "eval_steps_per_second": 7.888,
196
+ "step": 882
197
+ },
198
+ {
199
+ "epoch": 19.0,
200
+ "eval_accuracy": 0.8317120622568094,
201
+ "eval_loss": 1.2019933462142944,
202
+ "eval_model_preparation_time": 0.0053,
203
+ "eval_runtime": 16.435,
204
+ "eval_samples_per_second": 62.549,
205
+ "eval_steps_per_second": 7.849,
206
+ "step": 931
207
+ },
208
+ {
209
+ "epoch": 20.0,
210
+ "eval_accuracy": 0.8404669260700389,
211
+ "eval_loss": 1.1748104095458984,
212
+ "eval_model_preparation_time": 0.0053,
213
+ "eval_runtime": 16.4968,
214
+ "eval_samples_per_second": 62.315,
215
+ "eval_steps_per_second": 7.82,
216
+ "step": 980
217
+ },
218
+ {
219
+ "epoch": 20.408163265306122,
220
+ "grad_norm": 1.599041223526001,
221
+ "learning_rate": 8.041237113402062e-07,
222
+ "loss": 1.2347,
223
+ "step": 1000
224
+ },
225
+ {
226
+ "epoch": 21.0,
227
+ "eval_accuracy": 0.8492217898832685,
228
+ "eval_loss": 1.1474989652633667,
229
+ "eval_model_preparation_time": 0.0053,
230
+ "eval_runtime": 16.0812,
231
+ "eval_samples_per_second": 63.925,
232
+ "eval_steps_per_second": 8.022,
233
+ "step": 1029
234
+ },
235
+ {
236
+ "epoch": 22.0,
237
+ "eval_accuracy": 0.8521400778210116,
238
+ "eval_loss": 1.121047019958496,
239
+ "eval_model_preparation_time": 0.0053,
240
+ "eval_runtime": 16.0658,
241
+ "eval_samples_per_second": 63.987,
242
+ "eval_steps_per_second": 8.03,
243
+ "step": 1078
244
+ },
245
+ {
246
+ "epoch": 23.0,
247
+ "eval_accuracy": 0.8628404669260701,
248
+ "eval_loss": 1.0952664613723755,
249
+ "eval_model_preparation_time": 0.0053,
250
+ "eval_runtime": 16.283,
251
+ "eval_samples_per_second": 63.133,
252
+ "eval_steps_per_second": 7.922,
253
+ "step": 1127
254
+ },
255
+ {
256
+ "epoch": 24.0,
257
+ "eval_accuracy": 0.8638132295719845,
258
+ "eval_loss": 1.070285439491272,
259
+ "eval_model_preparation_time": 0.0053,
260
+ "eval_runtime": 15.9544,
261
+ "eval_samples_per_second": 64.434,
262
+ "eval_steps_per_second": 8.086,
263
+ "step": 1176
264
+ },
265
+ {
266
+ "epoch": 25.0,
267
+ "eval_accuracy": 0.8638132295719845,
268
+ "eval_loss": 1.046401023864746,
269
+ "eval_model_preparation_time": 0.0053,
270
+ "eval_runtime": 16.1019,
271
+ "eval_samples_per_second": 63.843,
272
+ "eval_steps_per_second": 8.011,
273
+ "step": 1225
274
+ },
275
+ {
276
+ "epoch": 26.0,
277
+ "eval_accuracy": 0.8686770428015564,
278
+ "eval_loss": 1.0234460830688477,
279
+ "eval_model_preparation_time": 0.0053,
280
+ "eval_runtime": 16.0897,
281
+ "eval_samples_per_second": 63.892,
282
+ "eval_steps_per_second": 8.018,
283
+ "step": 1274
284
+ },
285
+ {
286
+ "epoch": 27.0,
287
+ "eval_accuracy": 0.8696498054474708,
288
+ "eval_loss": 1.0003376007080078,
289
+ "eval_model_preparation_time": 0.0053,
290
+ "eval_runtime": 16.2835,
291
+ "eval_samples_per_second": 63.131,
292
+ "eval_steps_per_second": 7.922,
293
+ "step": 1323
294
+ },
295
+ {
296
+ "epoch": 28.0,
297
+ "eval_accuracy": 0.8696498054474708,
298
+ "eval_loss": 0.9786379337310791,
299
+ "eval_model_preparation_time": 0.0053,
300
+ "eval_runtime": 16.2428,
301
+ "eval_samples_per_second": 63.29,
302
+ "eval_steps_per_second": 7.942,
303
+ "step": 1372
304
+ },
305
+ {
306
+ "epoch": 29.0,
307
+ "eval_accuracy": 0.8715953307392996,
308
+ "eval_loss": 0.9573078751564026,
309
+ "eval_model_preparation_time": 0.0053,
310
+ "eval_runtime": 15.9987,
311
+ "eval_samples_per_second": 64.255,
312
+ "eval_steps_per_second": 8.063,
313
+ "step": 1421
314
+ },
315
+ {
316
+ "epoch": 30.0,
317
+ "eval_accuracy": 0.872568093385214,
318
+ "eval_loss": 0.9362879395484924,
319
+ "eval_model_preparation_time": 0.0053,
320
+ "eval_runtime": 16.1691,
321
+ "eval_samples_per_second": 63.578,
322
+ "eval_steps_per_second": 7.978,
323
+ "step": 1470
324
+ },
325
+ {
326
+ "epoch": 30.612244897959183,
327
+ "grad_norm": 1.5271226167678833,
328
+ "learning_rate": 7.010309278350515e-07,
329
+ "loss": 0.9467,
330
+ "step": 1500
331
+ },
332
+ {
333
+ "epoch": 31.0,
334
+ "eval_accuracy": 0.8754863813229572,
335
+ "eval_loss": 0.9171885848045349,
336
+ "eval_model_preparation_time": 0.0053,
337
+ "eval_runtime": 16.3696,
338
+ "eval_samples_per_second": 62.799,
339
+ "eval_steps_per_second": 7.88,
340
+ "step": 1519
341
+ },
342
+ {
343
+ "epoch": 32.0,
344
+ "eval_accuracy": 0.8803501945525292,
345
+ "eval_loss": 0.8980955481529236,
346
+ "eval_model_preparation_time": 0.0053,
347
+ "eval_runtime": 16.4392,
348
+ "eval_samples_per_second": 62.533,
349
+ "eval_steps_per_second": 7.847,
350
+ "step": 1568
351
+ },
352
+ {
353
+ "epoch": 33.0,
354
+ "eval_accuracy": 0.8861867704280155,
355
+ "eval_loss": 0.8809771537780762,
356
+ "eval_model_preparation_time": 0.0053,
357
+ "eval_runtime": 16.4449,
358
+ "eval_samples_per_second": 62.512,
359
+ "eval_steps_per_second": 7.844,
360
+ "step": 1617
361
+ },
362
+ {
363
+ "epoch": 34.0,
364
+ "eval_accuracy": 0.8861867704280155,
365
+ "eval_loss": 0.8635137677192688,
366
+ "eval_model_preparation_time": 0.0053,
367
+ "eval_runtime": 16.4485,
368
+ "eval_samples_per_second": 62.498,
369
+ "eval_steps_per_second": 7.843,
370
+ "step": 1666
371
+ },
372
+ {
373
+ "epoch": 35.0,
374
+ "eval_accuracy": 0.8881322957198443,
375
+ "eval_loss": 0.8468510508537292,
376
+ "eval_model_preparation_time": 0.0053,
377
+ "eval_runtime": 16.2536,
378
+ "eval_samples_per_second": 63.247,
379
+ "eval_steps_per_second": 7.937,
380
+ "step": 1715
381
+ },
382
+ {
383
+ "epoch": 36.0,
384
+ "eval_accuracy": 0.8881322957198443,
385
+ "eval_loss": 0.8328165411949158,
386
+ "eval_model_preparation_time": 0.0053,
387
+ "eval_runtime": 16.503,
388
+ "eval_samples_per_second": 62.292,
389
+ "eval_steps_per_second": 7.817,
390
+ "step": 1764
391
+ },
392
+ {
393
+ "epoch": 37.0,
394
+ "eval_accuracy": 0.8881322957198443,
395
+ "eval_loss": 0.8181366920471191,
396
+ "eval_model_preparation_time": 0.0053,
397
+ "eval_runtime": 16.8306,
398
+ "eval_samples_per_second": 61.079,
399
+ "eval_steps_per_second": 7.665,
400
+ "step": 1813
401
+ },
402
+ {
403
+ "epoch": 38.0,
404
+ "eval_accuracy": 0.8891050583657587,
405
+ "eval_loss": 0.8040880560874939,
406
+ "eval_model_preparation_time": 0.0053,
407
+ "eval_runtime": 16.772,
408
+ "eval_samples_per_second": 61.293,
409
+ "eval_steps_per_second": 7.691,
410
+ "step": 1862
411
+ },
412
+ {
413
+ "epoch": 39.0,
414
+ "eval_accuracy": 0.8891050583657587,
415
+ "eval_loss": 0.790228009223938,
416
+ "eval_model_preparation_time": 0.0053,
417
+ "eval_runtime": 16.3652,
418
+ "eval_samples_per_second": 62.816,
419
+ "eval_steps_per_second": 7.883,
420
+ "step": 1911
421
+ },
422
+ {
423
+ "epoch": 40.0,
424
+ "eval_accuracy": 0.8910505836575876,
425
+ "eval_loss": 0.7775859832763672,
426
+ "eval_model_preparation_time": 0.0053,
427
+ "eval_runtime": 16.2631,
428
+ "eval_samples_per_second": 63.211,
429
+ "eval_steps_per_second": 7.932,
430
+ "step": 1960
431
+ },
432
+ {
433
+ "epoch": 40.816326530612244,
434
+ "grad_norm": 1.7460684776306152,
435
+ "learning_rate": 5.979381443298969e-07,
436
+ "loss": 0.7398,
437
+ "step": 2000
438
+ },
439
+ {
440
+ "epoch": 41.0,
441
+ "eval_accuracy": 0.892023346303502,
442
+ "eval_loss": 0.766076922416687,
443
+ "eval_model_preparation_time": 0.0053,
444
+ "eval_runtime": 16.3183,
445
+ "eval_samples_per_second": 62.997,
446
+ "eval_steps_per_second": 7.905,
447
+ "step": 2009
448
+ },
449
+ {
450
+ "epoch": 42.0,
451
+ "eval_accuracy": 0.8949416342412452,
452
+ "eval_loss": 0.7547051906585693,
453
+ "eval_model_preparation_time": 0.0053,
454
+ "eval_runtime": 16.274,
455
+ "eval_samples_per_second": 63.168,
456
+ "eval_steps_per_second": 7.927,
457
+ "step": 2058
458
+ },
459
+ {
460
+ "epoch": 43.0,
461
+ "eval_accuracy": 0.8998054474708171,
462
+ "eval_loss": 0.7428992986679077,
463
+ "eval_model_preparation_time": 0.0053,
464
+ "eval_runtime": 16.2531,
465
+ "eval_samples_per_second": 63.25,
466
+ "eval_steps_per_second": 7.937,
467
+ "step": 2107
468
+ },
469
+ {
470
+ "epoch": 44.0,
471
+ "eval_accuracy": 0.9027237354085603,
472
+ "eval_loss": 0.7334992289543152,
473
+ "eval_model_preparation_time": 0.0053,
474
+ "eval_runtime": 16.1775,
475
+ "eval_samples_per_second": 63.545,
476
+ "eval_steps_per_second": 7.974,
477
+ "step": 2156
478
+ },
479
+ {
480
+ "epoch": 45.0,
481
+ "eval_accuracy": 0.9027237354085603,
482
+ "eval_loss": 0.7230122089385986,
483
+ "eval_model_preparation_time": 0.0053,
484
+ "eval_runtime": 16.4977,
485
+ "eval_samples_per_second": 62.312,
486
+ "eval_steps_per_second": 7.819,
487
+ "step": 2205
488
+ },
489
+ {
490
+ "epoch": 46.0,
491
+ "eval_accuracy": 0.9056420233463035,
492
+ "eval_loss": 0.7135947942733765,
493
+ "eval_model_preparation_time": 0.0053,
494
+ "eval_runtime": 16.7331,
495
+ "eval_samples_per_second": 61.435,
496
+ "eval_steps_per_second": 7.709,
497
+ "step": 2254
498
+ },
499
+ {
500
+ "epoch": 47.0,
501
+ "eval_accuracy": 0.9085603112840467,
502
+ "eval_loss": 0.7030473351478577,
503
+ "eval_model_preparation_time": 0.0053,
504
+ "eval_runtime": 16.265,
505
+ "eval_samples_per_second": 63.203,
506
+ "eval_steps_per_second": 7.931,
507
+ "step": 2303
508
+ },
509
+ {
510
+ "epoch": 48.0,
511
+ "eval_accuracy": 0.9066147859922179,
512
+ "eval_loss": 0.694833517074585,
513
+ "eval_model_preparation_time": 0.0053,
514
+ "eval_runtime": 16.6017,
515
+ "eval_samples_per_second": 61.921,
516
+ "eval_steps_per_second": 7.77,
517
+ "step": 2352
518
+ },
519
+ {
520
+ "epoch": 49.0,
521
+ "eval_accuracy": 0.9075875486381323,
522
+ "eval_loss": 0.6868679523468018,
523
+ "eval_model_preparation_time": 0.0053,
524
+ "eval_runtime": 16.3769,
525
+ "eval_samples_per_second": 62.771,
526
+ "eval_steps_per_second": 7.877,
527
+ "step": 2401
528
+ },
529
+ {
530
+ "epoch": 50.0,
531
+ "eval_accuracy": 0.9105058365758755,
532
+ "eval_loss": 0.6781201958656311,
533
+ "eval_model_preparation_time": 0.0053,
534
+ "eval_runtime": 16.5365,
535
+ "eval_samples_per_second": 62.166,
536
+ "eval_steps_per_second": 7.801,
537
+ "step": 2450
538
+ },
539
+ {
540
+ "epoch": 51.0,
541
+ "eval_accuracy": 0.9114785992217899,
542
+ "eval_loss": 0.6707582473754883,
543
+ "eval_model_preparation_time": 0.0053,
544
+ "eval_runtime": 16.7029,
545
+ "eval_samples_per_second": 61.546,
546
+ "eval_steps_per_second": 7.723,
547
+ "step": 2499
548
+ },
549
+ {
550
+ "epoch": 51.02040816326531,
551
+ "grad_norm": 1.5139410495758057,
552
+ "learning_rate": 4.948453608247422e-07,
553
+ "loss": 0.6135,
554
+ "step": 2500
555
+ },
556
+ {
557
+ "epoch": 52.0,
558
+ "eval_accuracy": 0.914396887159533,
559
+ "eval_loss": 0.6635868549346924,
560
+ "eval_model_preparation_time": 0.0053,
561
+ "eval_runtime": 16.7917,
562
+ "eval_samples_per_second": 61.221,
563
+ "eval_steps_per_second": 7.682,
564
+ "step": 2548
565
+ },
566
+ {
567
+ "epoch": 53.0,
568
+ "eval_accuracy": 0.9134241245136187,
569
+ "eval_loss": 0.6572186946868896,
570
+ "eval_model_preparation_time": 0.0053,
571
+ "eval_runtime": 16.5427,
572
+ "eval_samples_per_second": 62.142,
573
+ "eval_steps_per_second": 7.798,
574
+ "step": 2597
575
+ },
576
+ {
577
+ "epoch": 54.0,
578
+ "eval_accuracy": 0.9153696498054474,
579
+ "eval_loss": 0.6508779525756836,
580
+ "eval_model_preparation_time": 0.0053,
581
+ "eval_runtime": 16.3209,
582
+ "eval_samples_per_second": 62.987,
583
+ "eval_steps_per_second": 7.904,
584
+ "step": 2646
585
+ },
586
+ {
587
+ "epoch": 55.0,
588
+ "eval_accuracy": 0.9173151750972762,
589
+ "eval_loss": 0.6439871191978455,
590
+ "eval_model_preparation_time": 0.0053,
591
+ "eval_runtime": 16.5127,
592
+ "eval_samples_per_second": 62.255,
593
+ "eval_steps_per_second": 7.812,
594
+ "step": 2695
595
+ },
596
+ {
597
+ "epoch": 56.0,
598
+ "eval_accuracy": 0.9153696498054474,
599
+ "eval_loss": 0.6389443874359131,
600
+ "eval_model_preparation_time": 0.0053,
601
+ "eval_runtime": 16.1819,
602
+ "eval_samples_per_second": 63.528,
603
+ "eval_steps_per_second": 7.972,
604
+ "step": 2744
605
+ },
606
+ {
607
+ "epoch": 57.0,
608
+ "eval_accuracy": 0.919260700389105,
609
+ "eval_loss": 0.6327943801879883,
610
+ "eval_model_preparation_time": 0.0053,
611
+ "eval_runtime": 16.6521,
612
+ "eval_samples_per_second": 61.734,
613
+ "eval_steps_per_second": 7.747,
614
+ "step": 2793
615
+ },
616
+ {
617
+ "epoch": 58.0,
618
+ "eval_accuracy": 0.9212062256809338,
619
+ "eval_loss": 0.6252166032791138,
620
+ "eval_model_preparation_time": 0.0053,
621
+ "eval_runtime": 16.374,
622
+ "eval_samples_per_second": 62.782,
623
+ "eval_steps_per_second": 7.878,
624
+ "step": 2842
625
+ },
626
+ {
627
+ "epoch": 59.0,
628
+ "eval_accuracy": 0.9202334630350194,
629
+ "eval_loss": 0.6206849217414856,
630
+ "eval_model_preparation_time": 0.0053,
631
+ "eval_runtime": 16.2753,
632
+ "eval_samples_per_second": 63.163,
633
+ "eval_steps_per_second": 7.926,
634
+ "step": 2891
635
+ },
636
+ {
637
+ "epoch": 60.0,
638
+ "eval_accuracy": 0.919260700389105,
639
+ "eval_loss": 0.6157888174057007,
640
+ "eval_model_preparation_time": 0.0053,
641
+ "eval_runtime": 16.4343,
642
+ "eval_samples_per_second": 62.552,
643
+ "eval_steps_per_second": 7.849,
644
+ "step": 2940
645
+ },
646
+ {
647
+ "epoch": 61.0,
648
+ "eval_accuracy": 0.9221789883268483,
649
+ "eval_loss": 0.6105452179908752,
650
+ "eval_model_preparation_time": 0.0053,
651
+ "eval_runtime": 16.3327,
652
+ "eval_samples_per_second": 62.941,
653
+ "eval_steps_per_second": 7.898,
654
+ "step": 2989
655
+ },
656
+ {
657
+ "epoch": 61.224489795918366,
658
+ "grad_norm": 2.1964964866638184,
659
+ "learning_rate": 3.917525773195876e-07,
660
+ "loss": 0.5351,
661
+ "step": 3000
662
+ },
663
+ {
664
+ "epoch": 62.0,
665
+ "eval_accuracy": 0.9221789883268483,
666
+ "eval_loss": 0.6059720516204834,
667
+ "eval_model_preparation_time": 0.0053,
668
+ "eval_runtime": 15.9025,
669
+ "eval_samples_per_second": 64.644,
670
+ "eval_steps_per_second": 8.112,
671
+ "step": 3038
672
+ },
673
+ {
674
+ "epoch": 63.0,
675
+ "eval_accuracy": 0.9221789883268483,
676
+ "eval_loss": 0.6012532114982605,
677
+ "eval_model_preparation_time": 0.0053,
678
+ "eval_runtime": 16.1752,
679
+ "eval_samples_per_second": 63.554,
680
+ "eval_steps_per_second": 7.975,
681
+ "step": 3087
682
+ },
683
+ {
684
+ "epoch": 64.0,
685
+ "eval_accuracy": 0.9241245136186771,
686
+ "eval_loss": 0.5974491238594055,
687
+ "eval_model_preparation_time": 0.0053,
688
+ "eval_runtime": 16.006,
689
+ "eval_samples_per_second": 64.226,
690
+ "eval_steps_per_second": 8.059,
691
+ "step": 3136
692
+ },
693
+ {
694
+ "epoch": 65.0,
695
+ "eval_accuracy": 0.9250972762645915,
696
+ "eval_loss": 0.5935858488082886,
697
+ "eval_model_preparation_time": 0.0053,
698
+ "eval_runtime": 15.9581,
699
+ "eval_samples_per_second": 64.419,
700
+ "eval_steps_per_second": 8.084,
701
+ "step": 3185
702
+ },
703
+ {
704
+ "epoch": 66.0,
705
+ "eval_accuracy": 0.9260700389105059,
706
+ "eval_loss": 0.588865339756012,
707
+ "eval_model_preparation_time": 0.0053,
708
+ "eval_runtime": 16.1822,
709
+ "eval_samples_per_second": 63.526,
710
+ "eval_steps_per_second": 7.972,
711
+ "step": 3234
712
+ },
713
+ {
714
+ "epoch": 67.0,
715
+ "eval_accuracy": 0.9260700389105059,
716
+ "eval_loss": 0.5851794481277466,
717
+ "eval_model_preparation_time": 0.0053,
718
+ "eval_runtime": 16.116,
719
+ "eval_samples_per_second": 63.787,
720
+ "eval_steps_per_second": 8.004,
721
+ "step": 3283
722
+ },
723
+ {
724
+ "epoch": 68.0,
725
+ "eval_accuracy": 0.9221789883268483,
726
+ "eval_loss": 0.5818396210670471,
727
+ "eval_model_preparation_time": 0.0053,
728
+ "eval_runtime": 16.5601,
729
+ "eval_samples_per_second": 62.077,
730
+ "eval_steps_per_second": 7.79,
731
+ "step": 3332
732
+ },
733
+ {
734
+ "epoch": 69.0,
735
+ "eval_accuracy": 0.9231517509727627,
736
+ "eval_loss": 0.578502357006073,
737
+ "eval_model_preparation_time": 0.0053,
738
+ "eval_runtime": 16.364,
739
+ "eval_samples_per_second": 62.821,
740
+ "eval_steps_per_second": 7.883,
741
+ "step": 3381
742
+ },
743
+ {
744
+ "epoch": 70.0,
745
+ "eval_accuracy": 0.9241245136186771,
746
+ "eval_loss": 0.5750812888145447,
747
+ "eval_model_preparation_time": 0.0053,
748
+ "eval_runtime": 16.3061,
749
+ "eval_samples_per_second": 63.044,
750
+ "eval_steps_per_second": 7.911,
751
+ "step": 3430
752
+ },
753
+ {
754
+ "epoch": 71.0,
755
+ "eval_accuracy": 0.9241245136186771,
756
+ "eval_loss": 0.5718052387237549,
757
+ "eval_model_preparation_time": 0.0053,
758
+ "eval_runtime": 16.3828,
759
+ "eval_samples_per_second": 62.749,
760
+ "eval_steps_per_second": 7.874,
761
+ "step": 3479
762
+ },
763
+ {
764
+ "epoch": 71.42857142857143,
765
+ "grad_norm": 1.5724107027053833,
766
+ "learning_rate": 2.8865979381443296e-07,
767
+ "loss": 0.483,
768
+ "step": 3500
769
+ },
770
+ {
771
+ "epoch": 72.0,
772
+ "eval_accuracy": 0.9250972762645915,
773
+ "eval_loss": 0.5689519643783569,
774
+ "eval_model_preparation_time": 0.0053,
775
+ "eval_runtime": 16.3228,
776
+ "eval_samples_per_second": 62.98,
777
+ "eval_steps_per_second": 7.903,
778
+ "step": 3528
779
+ },
780
+ {
781
+ "epoch": 73.0,
782
+ "eval_accuracy": 0.9241245136186771,
783
+ "eval_loss": 0.565991997718811,
784
+ "eval_model_preparation_time": 0.0053,
785
+ "eval_runtime": 16.0251,
786
+ "eval_samples_per_second": 64.149,
787
+ "eval_steps_per_second": 8.05,
788
+ "step": 3577
789
+ },
790
+ {
791
+ "epoch": 74.0,
792
+ "eval_accuracy": 0.9231517509727627,
793
+ "eval_loss": 0.5631707906723022,
794
+ "eval_model_preparation_time": 0.0053,
795
+ "eval_runtime": 16.0794,
796
+ "eval_samples_per_second": 63.933,
797
+ "eval_steps_per_second": 8.023,
798
+ "step": 3626
799
+ },
800
+ {
801
+ "epoch": 75.0,
802
+ "eval_accuracy": 0.9250972762645915,
803
+ "eval_loss": 0.5604016780853271,
804
+ "eval_model_preparation_time": 0.0053,
805
+ "eval_runtime": 16.1767,
806
+ "eval_samples_per_second": 63.548,
807
+ "eval_steps_per_second": 7.974,
808
+ "step": 3675
809
+ },
810
+ {
811
+ "epoch": 76.0,
812
+ "eval_accuracy": 0.9241245136186771,
813
+ "eval_loss": 0.5577030777931213,
814
+ "eval_model_preparation_time": 0.0053,
815
+ "eval_runtime": 16.2849,
816
+ "eval_samples_per_second": 63.126,
817
+ "eval_steps_per_second": 7.921,
818
+ "step": 3724
819
+ },
820
+ {
821
+ "epoch": 77.0,
822
+ "eval_accuracy": 0.9250972762645915,
823
+ "eval_loss": 0.5554907917976379,
824
+ "eval_model_preparation_time": 0.0053,
825
+ "eval_runtime": 16.1225,
826
+ "eval_samples_per_second": 63.762,
827
+ "eval_steps_per_second": 8.001,
828
+ "step": 3773
829
+ },
830
+ {
831
+ "epoch": 78.0,
832
+ "eval_accuracy": 0.9260700389105059,
833
+ "eval_loss": 0.5532063841819763,
834
+ "eval_model_preparation_time": 0.0053,
835
+ "eval_runtime": 16.1489,
836
+ "eval_samples_per_second": 63.658,
837
+ "eval_steps_per_second": 7.988,
838
+ "step": 3822
839
+ },
840
+ {
841
+ "epoch": 79.0,
842
+ "eval_accuracy": 0.9241245136186771,
843
+ "eval_loss": 0.5517746806144714,
844
+ "eval_model_preparation_time": 0.0053,
845
+ "eval_runtime": 16.1776,
846
+ "eval_samples_per_second": 63.545,
847
+ "eval_steps_per_second": 7.974,
848
+ "step": 3871
849
+ },
850
+ {
851
+ "epoch": 80.0,
852
+ "eval_accuracy": 0.9260700389105059,
853
+ "eval_loss": 0.5491300821304321,
854
+ "eval_model_preparation_time": 0.0053,
855
+ "eval_runtime": 16.4646,
856
+ "eval_samples_per_second": 62.437,
857
+ "eval_steps_per_second": 7.835,
858
+ "step": 3920
859
+ },
860
+ {
861
+ "epoch": 81.0,
862
+ "eval_accuracy": 0.9260700389105059,
863
+ "eval_loss": 0.5468857884407043,
864
+ "eval_model_preparation_time": 0.0053,
865
+ "eval_runtime": 16.5897,
866
+ "eval_samples_per_second": 61.966,
867
+ "eval_steps_per_second": 7.776,
868
+ "step": 3969
869
+ },
870
+ {
871
+ "epoch": 81.63265306122449,
872
+ "grad_norm": 1.5352323055267334,
873
+ "learning_rate": 1.8556701030927835e-07,
874
+ "loss": 0.4496,
875
+ "step": 4000
876
+ },
877
+ {
878
+ "epoch": 82.0,
879
+ "eval_accuracy": 0.9260700389105059,
880
+ "eval_loss": 0.5453611016273499,
881
+ "eval_model_preparation_time": 0.0053,
882
+ "eval_runtime": 16.5158,
883
+ "eval_samples_per_second": 62.243,
884
+ "eval_steps_per_second": 7.811,
885
+ "step": 4018
886
+ },
887
+ {
888
+ "epoch": 83.0,
889
+ "eval_accuracy": 0.9289883268482491,
890
+ "eval_loss": 0.5431452989578247,
891
+ "eval_model_preparation_time": 0.0053,
892
+ "eval_runtime": 16.2358,
893
+ "eval_samples_per_second": 63.317,
894
+ "eval_steps_per_second": 7.945,
895
+ "step": 4067
896
+ },
897
+ {
898
+ "epoch": 84.0,
899
+ "eval_accuracy": 0.9270428015564203,
900
+ "eval_loss": 0.5419163107872009,
901
+ "eval_model_preparation_time": 0.0053,
902
+ "eval_runtime": 16.3758,
903
+ "eval_samples_per_second": 62.776,
904
+ "eval_steps_per_second": 7.877,
905
+ "step": 4116
906
+ },
907
+ {
908
+ "epoch": 85.0,
909
+ "eval_accuracy": 0.9280155642023347,
910
+ "eval_loss": 0.5407156348228455,
911
+ "eval_model_preparation_time": 0.0053,
912
+ "eval_runtime": 16.7946,
913
+ "eval_samples_per_second": 61.21,
914
+ "eval_steps_per_second": 7.681,
915
+ "step": 4165
916
+ },
917
+ {
918
+ "epoch": 86.0,
919
+ "eval_accuracy": 0.9280155642023347,
920
+ "eval_loss": 0.5391236543655396,
921
+ "eval_model_preparation_time": 0.0053,
922
+ "eval_runtime": 16.961,
923
+ "eval_samples_per_second": 60.609,
924
+ "eval_steps_per_second": 7.606,
925
+ "step": 4214
926
+ },
927
+ {
928
+ "epoch": 87.0,
929
+ "eval_accuracy": 0.9280155642023347,
930
+ "eval_loss": 0.5377916693687439,
931
+ "eval_model_preparation_time": 0.0053,
932
+ "eval_runtime": 16.9464,
933
+ "eval_samples_per_second": 60.662,
934
+ "eval_steps_per_second": 7.612,
935
+ "step": 4263
936
+ },
937
+ {
938
+ "epoch": 88.0,
939
+ "eval_accuracy": 0.9280155642023347,
940
+ "eval_loss": 0.5364987850189209,
941
+ "eval_model_preparation_time": 0.0053,
942
+ "eval_runtime": 16.757,
943
+ "eval_samples_per_second": 61.347,
944
+ "eval_steps_per_second": 7.698,
945
+ "step": 4312
946
+ },
947
+ {
948
+ "epoch": 89.0,
949
+ "eval_accuracy": 0.9280155642023347,
950
+ "eval_loss": 0.5354925394058228,
951
+ "eval_model_preparation_time": 0.0053,
952
+ "eval_runtime": 16.8288,
953
+ "eval_samples_per_second": 61.086,
954
+ "eval_steps_per_second": 7.665,
955
+ "step": 4361
956
+ },
957
+ {
958
+ "epoch": 90.0,
959
+ "eval_accuracy": 0.9270428015564203,
960
+ "eval_loss": 0.534569501876831,
961
+ "eval_model_preparation_time": 0.0053,
962
+ "eval_runtime": 16.5639,
963
+ "eval_samples_per_second": 62.063,
964
+ "eval_steps_per_second": 7.788,
965
+ "step": 4410
966
+ },
967
+ {
968
+ "epoch": 91.0,
969
+ "eval_accuracy": 0.9280155642023347,
970
+ "eval_loss": 0.5335658192634583,
971
+ "eval_model_preparation_time": 0.0053,
972
+ "eval_runtime": 16.5932,
973
+ "eval_samples_per_second": 61.953,
974
+ "eval_steps_per_second": 7.774,
975
+ "step": 4459
976
+ },
977
+ {
978
+ "epoch": 91.83673469387755,
979
+ "grad_norm": 1.4989055395126343,
980
+ "learning_rate": 8.24742268041237e-08,
981
+ "loss": 0.4299,
982
+ "step": 4500
983
+ },
984
+ {
985
+ "epoch": 92.0,
986
+ "eval_accuracy": 0.9280155642023347,
987
+ "eval_loss": 0.5327633023262024,
988
+ "eval_model_preparation_time": 0.0053,
989
+ "eval_runtime": 16.6218,
990
+ "eval_samples_per_second": 61.846,
991
+ "eval_steps_per_second": 7.761,
992
+ "step": 4508
993
+ },
994
+ {
995
+ "epoch": 93.0,
996
+ "eval_accuracy": 0.9280155642023347,
997
+ "eval_loss": 0.5321099758148193,
998
+ "eval_model_preparation_time": 0.0053,
999
+ "eval_runtime": 16.4686,
1000
+ "eval_samples_per_second": 62.422,
1001
+ "eval_steps_per_second": 7.833,
1002
+ "step": 4557
1003
+ },
1004
+ {
1005
+ "epoch": 94.0,
1006
+ "eval_accuracy": 0.9280155642023347,
1007
+ "eval_loss": 0.5314825177192688,
1008
+ "eval_model_preparation_time": 0.0053,
1009
+ "eval_runtime": 16.4592,
1010
+ "eval_samples_per_second": 62.458,
1011
+ "eval_steps_per_second": 7.838,
1012
+ "step": 4606
1013
+ },
1014
+ {
1015
+ "epoch": 95.0,
1016
+ "eval_accuracy": 0.9270428015564203,
1017
+ "eval_loss": 0.5310559868812561,
1018
+ "eval_model_preparation_time": 0.0053,
1019
+ "eval_runtime": 16.6737,
1020
+ "eval_samples_per_second": 61.654,
1021
+ "eval_steps_per_second": 7.737,
1022
+ "step": 4655
1023
+ },
1024
+ {
1025
+ "epoch": 96.0,
1026
+ "eval_accuracy": 0.9270428015564203,
1027
+ "eval_loss": 0.5306681990623474,
1028
+ "eval_model_preparation_time": 0.0053,
1029
+ "eval_runtime": 16.4895,
1030
+ "eval_samples_per_second": 62.343,
1031
+ "eval_steps_per_second": 7.823,
1032
+ "step": 4704
1033
+ },
1034
+ {
1035
+ "epoch": 97.0,
1036
+ "eval_accuracy": 0.9280155642023347,
1037
+ "eval_loss": 0.5302944183349609,
1038
+ "eval_model_preparation_time": 0.0053,
1039
+ "eval_runtime": 16.5288,
1040
+ "eval_samples_per_second": 62.194,
1041
+ "eval_steps_per_second": 7.805,
1042
+ "step": 4753
1043
+ },
1044
+ {
1045
+ "epoch": 98.0,
1046
+ "eval_accuracy": 0.9270428015564203,
1047
+ "eval_loss": 0.5300623774528503,
1048
+ "eval_model_preparation_time": 0.0053,
1049
+ "eval_runtime": 16.4477,
1050
+ "eval_samples_per_second": 62.501,
1051
+ "eval_steps_per_second": 7.843,
1052
+ "step": 4802
1053
+ },
1054
+ {
1055
+ "epoch": 99.0,
1056
+ "eval_accuracy": 0.9280155642023347,
1057
+ "eval_loss": 0.5299115180969238,
1058
+ "eval_model_preparation_time": 0.0053,
1059
+ "eval_runtime": 16.5643,
1060
+ "eval_samples_per_second": 62.061,
1061
+ "eval_steps_per_second": 7.788,
1062
+ "step": 4851
1063
+ },
1064
+ {
1065
+ "epoch": 100.0,
1066
+ "eval_accuracy": 0.9280155642023347,
1067
+ "eval_loss": 0.5298713445663452,
1068
+ "eval_model_preparation_time": 0.0053,
1069
+ "eval_runtime": 16.9421,
1070
+ "eval_samples_per_second": 60.677,
1071
+ "eval_steps_per_second": 7.614,
1072
+ "step": 4900
1073
+ }
1074
+ ],
1075
+ "logging_steps": 500,
1076
+ "max_steps": 4900,
1077
+ "num_input_tokens_seen": 0,
1078
+ "num_train_epochs": 100,
1079
+ "save_steps": 500,
1080
+ "stateful_callbacks": {
1081
+ "TrainerControl": {
1082
+ "args": {
1083
+ "should_epoch_stop": false,
1084
+ "should_evaluate": false,
1085
+ "should_log": false,
1086
+ "should_save": true,
1087
+ "should_training_stop": true
1088
+ },
1089
+ "attributes": {}
1090
+ }
1091
+ },
1092
+ "total_flos": 1.1949586091012506e+19,
1093
+ "train_batch_size": 32,
1094
+ "trial_name": null,
1095
+ "trial_params": null
1096
+ }
checkpoint-4900/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:858525d77a2aca146b3415e12c5a888e5f5b65064bb27a757b2697e4eabc4024
3
+ size 5176
config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "curly",
13
+ "1": "dreadlocks",
14
+ "2": "kinky",
15
+ "3": "straight",
16
+ "4": "wavy"
17
+ },
18
+ "image_size": 224,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "curly": 0,
23
+ "dreadlocks": 1,
24
+ "kinky": 2,
25
+ "straight": 3,
26
+ "wavy": 4
27
+ },
28
+ "layer_norm_eps": 1e-12,
29
+ "model_type": "vit",
30
+ "num_attention_heads": 12,
31
+ "num_channels": 3,
32
+ "num_hidden_layers": 12,
33
+ "patch_size": 16,
34
+ "problem_type": "single_label_classification",
35
+ "qkv_bias": true,
36
+ "torch_dtype": "float32",
37
+ "transformers_version": "4.45.2"
38
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:880839388dee970fc3c9a41a9ea13295c6093fea1653d77780af5a05743c9cf2
3
+ size 343233204
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:858525d77a2aca146b3415e12c5a888e5f5b65064bb27a757b2697e4eabc4024
3
+ size 5176