viahes commited on
Commit
92d2dcb
1 Parent(s): db6a9e5

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "dccuchile/bert-base-spanish-wwm-cased",
3
+ "adapters": {
4
+ "adapters": {},
5
+ "config_map": {},
6
+ "fusion_config_map": {},
7
+ "fusions": {}
8
+ },
9
+ "architectures": [
10
+ "BertForSequenceClassification"
11
+ ],
12
+ "attention_probs_dropout_prob": 0.12254664017551167,
13
+ "classifier_dropout": null,
14
+ "gradient_checkpointing": false,
15
+ "hidden_act": "gelu",
16
+ "hidden_dropout_prob": 0.09767216663102402,
17
+ "hidden_size": 768,
18
+ "id2label": {
19
+ "0": "right",
20
+ "1": "left"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "left": 1,
26
+ "right": 0
27
+ },
28
+ "layer_norm_eps": 1e-12,
29
+ "max_position_embeddings": 512,
30
+ "model_type": "bert",
31
+ "num_attention_heads": 12,
32
+ "num_hidden_layers": 12,
33
+ "output_past": true,
34
+ "pad_token_id": 1,
35
+ "position_embedding_type": "absolute",
36
+ "problem_type": "single_label_classification",
37
+ "torch_dtype": "float32",
38
+ "transformers_version": "4.26.1",
39
+ "type_vocab_size": 2,
40
+ "use_cache": true,
41
+ "vocab_size": 31002
42
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b0d42934c6adcc6932fde9dde10c84e023f4f777488be65d7d6e55247290989
3
+ size 878937221
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c78038019e0564f6ed9a02b54d51c1ba25a1d7a14ee96dc53bbecdf47a82e835
3
+ size 439482485
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48b43e5d32e32af08a967d6b00b55ac9de2566e8afe2dee53092b67bdda7e824
3
+ size 14575
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b074d794dc78d226b73c1c5bd5c2de265ec253feb15eebde071c24b606777f92
3
+ size 557
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ad948fe02295e84ba3f25207af9671d9455bcbfa0ef083e143204df91d9429
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
4
+ "do_lower_case": false,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "name_or_path": "dccuchile/bert-base-spanish-wwm-cased",
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "special_tokens_map_file": "/home/lhurtado/.cache/huggingface/hub/models--dccuchile--bert-base-spanish-wwm-cased/snapshots/56a7647b957a4230fc3f80dafbe80f2ba9b0de73/special_tokens_map.json",
12
+ "strip_accents": false,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "BertTokenizer",
15
+ "unk_token": "[UNK]"
16
+ }
trainer_state.json ADDED
@@ -0,0 +1,400 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9818782216494845,
3
+ "best_model_checkpoint": "../models/PoliticES/ideology_binary/BETO/450_SEED_11/run-1/checkpoint-3180",
4
+ "epoch": 14.99647473560517,
5
+ "global_step": 3180,
6
+ "is_hyper_param_search": true,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.94,
12
+ "learning_rate": 2.7172184226965135e-05,
13
+ "loss": 0.4738,
14
+ "step": 200
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_accuracy": 0.7511111111111111,
19
+ "eval_jaccard": 0.5818173817381738,
20
+ "eval_loss": 0.56746506690979,
21
+ "eval_macro_f1": 0.716113914924297,
22
+ "eval_macro_precision": 0.8270380109968922,
23
+ "eval_macro_recall": 0.7193362193362193,
24
+ "eval_micro_f1": 0.7511111111111111,
25
+ "eval_micro_precision": 0.7511111111111111,
26
+ "eval_micro_recall": 0.7511111111111111,
27
+ "eval_runtime": 3.8883,
28
+ "eval_samples_per_second": 387.83,
29
+ "eval_steps_per_second": 12.345,
30
+ "eval_weighted_f1": 0.7280749819754866,
31
+ "eval_weighted_precision": 0.8113889552952426,
32
+ "eval_weighted_recall": 0.7511111111111111,
33
+ "step": 212
34
+ },
35
+ {
36
+ "epoch": 1.88,
37
+ "learning_rate": 2.7172184226965135e-05,
38
+ "loss": 0.3003,
39
+ "step": 400
40
+ },
41
+ {
42
+ "epoch": 2.0,
43
+ "eval_accuracy": 0.9422222222222222,
44
+ "eval_jaccard": 0.8905400934915497,
45
+ "eval_loss": 0.2769545614719391,
46
+ "eval_macro_f1": 0.9410294147295417,
47
+ "eval_macro_precision": 0.9443722592171513,
48
+ "eval_macro_recall": 0.9386724386724387,
49
+ "eval_micro_f1": 0.9422222222222222,
50
+ "eval_micro_precision": 0.9422222222222222,
51
+ "eval_micro_recall": 0.9422222222222222,
52
+ "eval_runtime": 3.8935,
53
+ "eval_samples_per_second": 387.316,
54
+ "eval_steps_per_second": 12.328,
55
+ "eval_weighted_f1": 0.9420358460514909,
56
+ "eval_weighted_precision": 0.9428033133019327,
57
+ "eval_weighted_recall": 0.9422222222222222,
58
+ "step": 424
59
+ },
60
+ {
61
+ "epoch": 2.83,
62
+ "learning_rate": 2.7172184226965135e-05,
63
+ "loss": 0.1564,
64
+ "step": 600
65
+ },
66
+ {
67
+ "epoch": 3.0,
68
+ "eval_accuracy": 0.9333333333333333,
69
+ "eval_jaccard": 0.8754464285714286,
70
+ "eval_loss": 0.3916676938533783,
71
+ "eval_macro_f1": 0.9329945005856777,
72
+ "eval_macro_precision": 0.9322134387351779,
73
+ "eval_macro_recall": 0.9383116883116883,
74
+ "eval_micro_f1": 0.9333333333333333,
75
+ "eval_micro_precision": 0.9333333333333333,
76
+ "eval_micro_recall": 0.9333333333333333,
77
+ "eval_runtime": 3.9044,
78
+ "eval_samples_per_second": 386.23,
79
+ "eval_steps_per_second": 12.294,
80
+ "eval_weighted_f1": 0.9335662808473466,
81
+ "eval_weighted_precision": 0.9382608695652174,
82
+ "eval_weighted_recall": 0.9333333333333333,
83
+ "step": 636
84
+ },
85
+ {
86
+ "epoch": 3.77,
87
+ "learning_rate": 2.7172184226965135e-05,
88
+ "loss": 0.0897,
89
+ "step": 800
90
+ },
91
+ {
92
+ "epoch": 4.0,
93
+ "eval_accuracy": 0.9422222222222222,
94
+ "eval_jaccard": 0.8902276505022765,
95
+ "eval_loss": 0.4636266231536865,
96
+ "eval_macro_f1": 0.9407042510490785,
97
+ "eval_macro_precision": 0.9481481481481482,
98
+ "eval_macro_recall": 0.9365079365079365,
99
+ "eval_micro_f1": 0.9422222222222222,
100
+ "eval_micro_precision": 0.9422222222222222,
101
+ "eval_micro_recall": 0.9422222222222222,
102
+ "eval_runtime": 3.8881,
103
+ "eval_samples_per_second": 387.847,
104
+ "eval_steps_per_second": 12.345,
105
+ "eval_weighted_f1": 0.9418427294289362,
106
+ "eval_weighted_precision": 0.9445925925925925,
107
+ "eval_weighted_recall": 0.9422222222222222,
108
+ "step": 848
109
+ },
110
+ {
111
+ "epoch": 4.71,
112
+ "learning_rate": 2.7172184226965135e-05,
113
+ "loss": 0.0562,
114
+ "step": 1000
115
+ },
116
+ {
117
+ "epoch": 5.0,
118
+ "eval_accuracy": 0.9422222222222222,
119
+ "eval_jaccard": 0.8902276505022765,
120
+ "eval_loss": 0.5073122978210449,
121
+ "eval_macro_f1": 0.9407042510490785,
122
+ "eval_macro_precision": 0.9481481481481482,
123
+ "eval_macro_recall": 0.9365079365079365,
124
+ "eval_micro_f1": 0.9422222222222222,
125
+ "eval_micro_precision": 0.9422222222222222,
126
+ "eval_micro_recall": 0.9422222222222222,
127
+ "eval_runtime": 3.9083,
128
+ "eval_samples_per_second": 385.848,
129
+ "eval_steps_per_second": 12.282,
130
+ "eval_weighted_f1": 0.9418427294289362,
131
+ "eval_weighted_precision": 0.9445925925925925,
132
+ "eval_weighted_recall": 0.9422222222222222,
133
+ "step": 1060
134
+ },
135
+ {
136
+ "epoch": 5.66,
137
+ "learning_rate": 2.7172184226965135e-05,
138
+ "loss": 0.0804,
139
+ "step": 1200
140
+ },
141
+ {
142
+ "epoch": 6.0,
143
+ "eval_accuracy": 0.96,
144
+ "eval_jaccard": 0.9233056148355289,
145
+ "eval_loss": 0.6428035497665405,
146
+ "eval_macro_f1": 0.9596814335490294,
147
+ "eval_macro_precision": 0.9580624702711273,
148
+ "eval_macro_recall": 0.9632034632034632,
149
+ "eval_micro_f1": 0.96,
150
+ "eval_micro_precision": 0.96,
151
+ "eval_micro_recall": 0.96,
152
+ "eval_runtime": 3.9157,
153
+ "eval_samples_per_second": 385.112,
154
+ "eval_steps_per_second": 12.258,
155
+ "eval_weighted_f1": 0.9601114982578397,
156
+ "eval_weighted_precision": 0.9620865704772475,
157
+ "eval_weighted_recall": 0.96,
158
+ "step": 1272
159
+ },
160
+ {
161
+ "epoch": 6.6,
162
+ "learning_rate": 2.7172184226965135e-05,
163
+ "loss": 0.0179,
164
+ "step": 1400
165
+ },
166
+ {
167
+ "epoch": 7.0,
168
+ "eval_accuracy": 0.9555555555555556,
169
+ "eval_jaccard": 0.9148573109738158,
170
+ "eval_loss": 0.630973756313324,
171
+ "eval_macro_f1": 0.9548047565482886,
172
+ "eval_macro_precision": 0.9559439432989691,
173
+ "eval_macro_recall": 0.9538239538239539,
174
+ "eval_micro_f1": 0.9555555555555556,
175
+ "eval_micro_precision": 0.9555555555555556,
176
+ "eval_micro_recall": 0.9555555555555556,
177
+ "eval_runtime": 3.9129,
178
+ "eval_samples_per_second": 385.393,
179
+ "eval_steps_per_second": 12.267,
180
+ "eval_weighted_f1": 0.9555037763136751,
181
+ "eval_weighted_precision": 0.9556056701030928,
182
+ "eval_weighted_recall": 0.9555555555555556,
183
+ "step": 1484
184
+ },
185
+ {
186
+ "epoch": 7.55,
187
+ "learning_rate": 2.7172184226965135e-05,
188
+ "loss": 0.0454,
189
+ "step": 1600
190
+ },
191
+ {
192
+ "epoch": 8.0,
193
+ "eval_accuracy": 0.9644444444444444,
194
+ "eval_jaccard": 0.9312917227959886,
195
+ "eval_loss": 0.5245578289031982,
196
+ "eval_macro_f1": 0.9638438052386309,
197
+ "eval_macro_precision": 0.9650048324742269,
198
+ "eval_macro_recall": 0.9628427128427128,
199
+ "eval_micro_f1": 0.9644444444444444,
200
+ "eval_micro_precision": 0.9644444444444444,
201
+ "eval_micro_recall": 0.9644444444444444,
202
+ "eval_runtime": 3.9143,
203
+ "eval_samples_per_second": 385.257,
204
+ "eval_steps_per_second": 12.263,
205
+ "eval_weighted_f1": 0.9644030210509401,
206
+ "eval_weighted_precision": 0.9645167525773195,
207
+ "eval_weighted_recall": 0.9644444444444444,
208
+ "step": 1696
209
+ },
210
+ {
211
+ "epoch": 8.49,
212
+ "learning_rate": 2.7172184226965135e-05,
213
+ "loss": 0.0276,
214
+ "step": 1800
215
+ },
216
+ {
217
+ "epoch": 9.0,
218
+ "eval_accuracy": 0.9511111111111111,
219
+ "eval_jaccard": 0.9064495782911624,
220
+ "eval_loss": 0.645963728427887,
221
+ "eval_macro_f1": 0.9499686672461541,
222
+ "eval_macro_precision": 0.9552958483164433,
223
+ "eval_macro_recall": 0.9466089466089466,
224
+ "eval_micro_f1": 0.9511111111111111,
225
+ "eval_micro_precision": 0.9511111111111111,
226
+ "eval_micro_recall": 0.9511111111111111,
227
+ "eval_runtime": 3.8963,
228
+ "eval_samples_per_second": 387.03,
229
+ "eval_steps_per_second": 12.319,
230
+ "eval_weighted_f1": 0.9508759020800905,
231
+ "eval_weighted_precision": 0.9525400457665903,
232
+ "eval_weighted_recall": 0.9511111111111111,
233
+ "step": 1908
234
+ },
235
+ {
236
+ "epoch": 9.43,
237
+ "learning_rate": 2.7172184226965135e-05,
238
+ "loss": 0.0244,
239
+ "step": 2000
240
+ },
241
+ {
242
+ "epoch": 10.0,
243
+ "eval_accuracy": 0.9688888888888889,
244
+ "eval_jaccard": 0.9395812863729122,
245
+ "eval_loss": 0.5576620697975159,
246
+ "eval_macro_f1": 0.9683257918552036,
247
+ "eval_macro_precision": 0.970203488372093,
248
+ "eval_macro_recall": 0.9668109668109668,
249
+ "eval_micro_f1": 0.9688888888888889,
250
+ "eval_micro_precision": 0.9688888888888889,
251
+ "eval_micro_recall": 0.9688888888888889,
252
+ "eval_runtime": 3.8888,
253
+ "eval_samples_per_second": 387.781,
254
+ "eval_steps_per_second": 12.343,
255
+ "eval_weighted_f1": 0.9688325791855203,
256
+ "eval_weighted_precision": 0.9691279069767442,
257
+ "eval_weighted_recall": 0.9688888888888889,
258
+ "step": 2120
259
+ },
260
+ {
261
+ "epoch": 10.38,
262
+ "learning_rate": 2.7172184226965135e-05,
263
+ "loss": 0.0144,
264
+ "step": 2200
265
+ },
266
+ {
267
+ "epoch": 11.0,
268
+ "eval_accuracy": 0.9688888888888889,
269
+ "eval_jaccard": 0.9397094904794161,
270
+ "eval_loss": 0.6369400024414062,
271
+ "eval_macro_f1": 0.9684678371939377,
272
+ "eval_macro_precision": 0.968,
273
+ "eval_macro_recall": 0.968975468975469,
274
+ "eval_micro_f1": 0.9688888888888889,
275
+ "eval_micro_precision": 0.9688888888888889,
276
+ "eval_micro_recall": 0.9688888888888889,
277
+ "eval_runtime": 3.8878,
278
+ "eval_samples_per_second": 387.876,
279
+ "eval_steps_per_second": 12.346,
280
+ "eval_weighted_f1": 0.9689050831848484,
281
+ "eval_weighted_precision": 0.9689599999999999,
282
+ "eval_weighted_recall": 0.9688888888888889,
283
+ "step": 2332
284
+ },
285
+ {
286
+ "epoch": 11.32,
287
+ "learning_rate": 2.7172184226965135e-05,
288
+ "loss": 0.0113,
289
+ "step": 2400
290
+ },
291
+ {
292
+ "epoch": 12.0,
293
+ "eval_accuracy": 0.9688888888888889,
294
+ "eval_jaccard": 0.9395812863729122,
295
+ "eval_loss": 0.6348240971565247,
296
+ "eval_macro_f1": 0.9683257918552036,
297
+ "eval_macro_precision": 0.970203488372093,
298
+ "eval_macro_recall": 0.9668109668109668,
299
+ "eval_micro_f1": 0.9688888888888889,
300
+ "eval_micro_precision": 0.9688888888888889,
301
+ "eval_micro_recall": 0.9688888888888889,
302
+ "eval_runtime": 3.8915,
303
+ "eval_samples_per_second": 387.511,
304
+ "eval_steps_per_second": 12.335,
305
+ "eval_weighted_f1": 0.9688325791855203,
306
+ "eval_weighted_precision": 0.9691279069767442,
307
+ "eval_weighted_recall": 0.9688888888888889,
308
+ "step": 2544
309
+ },
310
+ {
311
+ "epoch": 12.26,
312
+ "learning_rate": 2.7172184226965135e-05,
313
+ "loss": 0.017,
314
+ "step": 2600
315
+ },
316
+ {
317
+ "epoch": 13.0,
318
+ "eval_accuracy": 0.9333333333333333,
319
+ "eval_jaccard": 0.8737588652482269,
320
+ "eval_loss": 0.6715770363807678,
321
+ "eval_macro_f1": 0.9309265058021734,
322
+ "eval_macro_precision": 0.9468085106382979,
323
+ "eval_macro_recall": 0.9242424242424243,
324
+ "eval_micro_f1": 0.9333333333333333,
325
+ "eval_micro_precision": 0.9333333333333333,
326
+ "eval_micro_recall": 0.9333333333333333,
327
+ "eval_runtime": 3.8914,
328
+ "eval_samples_per_second": 387.517,
329
+ "eval_steps_per_second": 12.335,
330
+ "eval_weighted_f1": 0.9324737520722048,
331
+ "eval_weighted_precision": 0.9404255319148935,
332
+ "eval_weighted_recall": 0.9333333333333333,
333
+ "step": 2756
334
+ },
335
+ {
336
+ "epoch": 13.21,
337
+ "learning_rate": 2.7172184226965135e-05,
338
+ "loss": 0.0162,
339
+ "step": 2800
340
+ },
341
+ {
342
+ "epoch": 14.0,
343
+ "eval_accuracy": 0.9555555555555556,
344
+ "eval_jaccard": 0.9146445987882369,
345
+ "eval_loss": 0.6302870512008667,
346
+ "eval_macro_f1": 0.954578488372093,
347
+ "eval_macro_precision": 0.9589442815249267,
348
+ "eval_macro_recall": 0.9516594516594516,
349
+ "eval_micro_f1": 0.9555555555555556,
350
+ "eval_micro_precision": 0.9555555555555556,
351
+ "eval_micro_recall": 0.9555555555555556,
352
+ "eval_runtime": 3.8937,
353
+ "eval_samples_per_second": 387.288,
354
+ "eval_steps_per_second": 12.327,
355
+ "eval_weighted_f1": 0.9553779069767442,
356
+ "eval_weighted_precision": 0.9565982404692083,
357
+ "eval_weighted_recall": 0.9555555555555556,
358
+ "step": 2968
359
+ },
360
+ {
361
+ "epoch": 14.15,
362
+ "learning_rate": 2.7172184226965135e-05,
363
+ "loss": 0.0134,
364
+ "step": 3000
365
+ },
366
+ {
367
+ "epoch": 15.0,
368
+ "eval_accuracy": 0.9822222222222222,
369
+ "eval_jaccard": 0.964991452991453,
370
+ "eval_loss": 0.6276100277900696,
371
+ "eval_macro_f1": 0.9818782216494845,
372
+ "eval_macro_precision": 0.9846153846153847,
373
+ "eval_macro_recall": 0.9797979797979798,
374
+ "eval_micro_f1": 0.9822222222222222,
375
+ "eval_micro_precision": 0.9822222222222222,
376
+ "eval_micro_recall": 0.9822222222222222,
377
+ "eval_runtime": 3.8919,
378
+ "eval_samples_per_second": 387.474,
379
+ "eval_steps_per_second": 12.333,
380
+ "eval_weighted_f1": 0.9821778350515463,
381
+ "eval_weighted_precision": 0.9827692307692307,
382
+ "eval_weighted_recall": 0.9822222222222222,
383
+ "step": 3180
384
+ }
385
+ ],
386
+ "max_steps": 4240,
387
+ "num_train_epochs": 20,
388
+ "total_flos": 4.724653668328427e+16,
389
+ "trial_name": null,
390
+ "trial_params": {
391
+ "attention_probs_dropout_prob": 0.12254664017551167,
392
+ "gradient_accumulation_steps": 4,
393
+ "hidden_dropout_prob": 0.09767216663102402,
394
+ "learning_rate": 2.7172184226965135e-05,
395
+ "lr_scheduler_type": "constant",
396
+ "num_train_epochs": 20,
397
+ "per_device_train_batch_size": 16,
398
+ "weight_decay": 0.004770259527907836
399
+ }
400
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ab52a9c26c37fd87431c66d4e04ba39612e66e1e5418d2ba771a3825defbc8d
3
+ size 3579
vocab.txt ADDED
The diff for this file is too large to render. See raw diff