jdorairaj commited on
Commit
f2a1bed
1 Parent(s): 296cc80

completed la on sst2

Browse files
Files changed (26) hide show
  1. outputs/args.json +1 -1
  2. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/logfile_la.log +676 -0
  3. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  4. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  5. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/gpu_stats_la.json +130 -0
  6. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  7. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  8. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/gpu_stats_la.json +130 -0
  9. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  10. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  11. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/gpu_stats_la.json +130 -0
  12. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json +1 -0
  13. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json +0 -0
  14. outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/gpu_stats_la.json +130 -0
  15. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_1999/f_mu_kron_all_homo_1000.pt +3 -0
  16. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_1999/f_var_kron_all_homo_1000.pt +3 -0
  17. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_1999/prior_precision_kron_all_homo_1000.pt +3 -0
  18. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_3999/f_mu_kron_all_homo_1000.pt +3 -0
  19. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_3999/f_var_kron_all_homo_1000.pt +3 -0
  20. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_3999/prior_precision_kron_all_homo_1000.pt +3 -0
  21. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_5999/f_mu_kron_all_homo_1000.pt +3 -0
  22. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_5999/f_var_kron_all_homo_1000.pt +3 -0
  23. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_5999/prior_precision_kron_all_homo_1000.pt +3 -0
  24. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_7999/f_mu_kron_all_homo_1000.pt +3 -0
  25. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_7999/f_var_kron_all_homo_1000.pt +3 -0
  26. outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_7999/prior_precision_kron_all_homo_1000.pt +3 -0
outputs/args.json CHANGED
@@ -17,7 +17,7 @@
17
  "num_warmup_steps": 0,
18
  "output_dir": "./outputs",
19
  "peft_method": null,
20
- "seed": 42,
21
  "push_to_hub": false,
22
  "hub_model_id": null,
23
  "hub_token": null,
 
17
  "num_warmup_steps": 0,
18
  "output_dir": "./outputs",
19
  "peft_method": null,
20
+ "seed": 12345,
21
  "push_to_hub": false,
22
  "hub_model_id": null,
23
  "hub_token": null,
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/logfile_la.log CHANGED
@@ -251,3 +251,679 @@
251
  06/02/2024 09:30:51 - INFO - __main__ - Sample 27303 of the training set: {'input_ids': [0, 10273, 5971, 2650, 2156, 114, 3694, 2156, 29166, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
252
  06/02/2024 09:30:51 - INFO - __main__ - Sample 48017 of the training set: {'input_ids': [0, 2629, 2526, 9288, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1], 'labels': 0}.
253
  06/02/2024 09:30:51 - INFO - __main__ - Sample 666 of the training set: {'input_ids': [0, 26692, 45, 173, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  06/02/2024 09:30:51 - INFO - __main__ - Sample 27303 of the training set: {'input_ids': [0, 10273, 5971, 2650, 2156, 114, 3694, 2156, 29166, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
252
  06/02/2024 09:30:51 - INFO - __main__ - Sample 48017 of the training set: {'input_ids': [0, 2629, 2526, 9288, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1], 'labels': 0}.
253
  06/02/2024 09:30:51 - INFO - __main__ - Sample 666 of the training set: {'input_ids': [0, 26692, 45, 173, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
254
+ 06/02/2024 13:56:18 - INFO - __main__ - Number of labels detected = 2
255
+ 06/02/2024 13:56:19 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
256
+ 06/02/2024 13:56:20 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/adapter_config.json
257
+ 06/02/2024 13:56:20 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'.
258
+ 06/02/2024 13:56:20 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/pytorch_adapter.bin
259
+ 06/02/2024 13:56:20 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/head_config.json
260
+ 06/02/2024 13:56:20 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
261
+ 06/02/2024 13:56:20 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/pytorch_model_head.bin
262
+ 06/02/2024 13:56:20 - INFO - __main__ - Adapter Name = sst2
263
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight
264
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias
265
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight
266
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias
267
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight
268
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias
269
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight
270
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias
271
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight
272
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias
273
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight
274
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias
275
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight
276
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias
277
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight
278
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias
279
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight
280
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias
281
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight
282
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias
283
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight
284
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias
285
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight
286
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias
287
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight
288
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias
289
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight
290
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias
291
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight
292
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias
293
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight
294
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias
295
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight
296
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias
297
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight
298
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias
299
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight
300
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias
301
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight
302
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias
303
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight
304
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias
305
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight
306
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias
307
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight
308
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias
309
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight
310
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias
311
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight
312
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias
313
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight
314
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias
315
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight
316
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias
317
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight
318
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias
319
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight
320
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias
321
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight
322
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias
323
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight
324
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias
325
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight
326
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias
327
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight
328
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias
329
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight
330
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias
331
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight
332
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias
333
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight
334
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias
335
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight
336
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias
337
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight
338
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias
339
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight
340
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias
341
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight
342
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias
343
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight
344
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias
345
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight
346
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias
347
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight
348
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias
349
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight
350
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias
351
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight
352
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias
353
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight
354
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias
355
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight
356
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias
357
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight
358
+ 06/02/2024 13:56:20 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias
359
+ 06/02/2024 13:56:20 - INFO - __main__ - heads.sst2.1.weight
360
+ 06/02/2024 13:56:20 - INFO - __main__ - heads.sst2.1.bias
361
+ 06/02/2024 13:56:20 - INFO - __main__ - heads.sst2.4.weight
362
+ 06/02/2024 13:56:20 - INFO - __main__ - heads.sst2.4.bias
363
+ 06/02/2024 13:56:34 - INFO - __main__ - Sample 27303 of the training set: {'input_ids': [0, 10273, 5971, 2650, 2156, 114, 3694, 2156, 29166, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
364
+ 06/02/2024 13:56:34 - INFO - __main__ - Sample 48017 of the training set: {'input_ids': [0, 2629, 2526, 9288, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1], 'labels': 0}.
365
+ 06/02/2024 13:56:34 - INFO - __main__ - Sample 666 of the training set: {'input_ids': [0, 26692, 45, 173, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
366
+ 06/02/2024 14:27:41 - INFO - __main__ - f_mu shape : torch.Size([872, 2])
367
+ 06/02/2024 14:27:41 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2])
368
+ 06/02/2024 14:27:41 - INFO - __main__ - tensor([[-4.2414, 4.5040],
369
+ [ 0.6184, -0.4093],
370
+ [-2.2437, 2.3636],
371
+ ...,
372
+ [ 0.3875, -0.2475],
373
+ [ 2.0167, -1.9000],
374
+ [-2.6153, 2.8984]], device='cuda:0')
375
+ 06/02/2024 14:27:41 - INFO - __main__ - tensor([[[3.0718, 2.7085],
376
+ [2.7085, 3.0434]],
377
+
378
+ [[1.7573, 0.4796],
379
+ [0.4796, 1.7348]],
380
+
381
+ [[1.9210, 1.0496],
382
+ [1.0496, 1.9672]],
383
+
384
+ ...,
385
+
386
+ [[1.4768, 0.1399],
387
+ [0.1399, 1.5445]],
388
+
389
+ [[2.4637, 0.4742],
390
+ [0.4742, 2.3755]],
391
+
392
+ [[2.6679, 1.4382],
393
+ [1.4382, 2.7226]]], device='cuda:0')
394
+ 06/02/2024 14:27:41 - INFO - __main__ - ***** Completed training *****
395
+ 06/02/2024 14:27:44 - INFO - __main__ - Number of labels detected = 2
396
+ 06/02/2024 14:27:45 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
397
+ 06/02/2024 14:27:46 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/adapter_config.json
398
+ 06/02/2024 14:27:46 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'.
399
+ 06/02/2024 14:27:46 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/pytorch_adapter.bin
400
+ 06/02/2024 14:27:46 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/head_config.json
401
+ 06/02/2024 14:27:46 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
402
+ 06/02/2024 14:27:46 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/pytorch_model_head.bin
403
+ 06/02/2024 14:27:46 - INFO - __main__ - Adapter Name = sst2
404
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight
405
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias
406
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight
407
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias
408
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight
409
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias
410
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight
411
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias
412
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight
413
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias
414
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight
415
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias
416
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight
417
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias
418
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight
419
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias
420
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight
421
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias
422
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight
423
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias
424
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight
425
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias
426
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight
427
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias
428
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight
429
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias
430
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight
431
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias
432
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight
433
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias
434
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight
435
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias
436
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight
437
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias
438
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight
439
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias
440
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight
441
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias
442
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight
443
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias
444
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight
445
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias
446
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight
447
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias
448
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight
449
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias
450
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight
451
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias
452
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight
453
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias
454
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight
455
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias
456
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight
457
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias
458
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight
459
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias
460
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight
461
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias
462
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight
463
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias
464
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight
465
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias
466
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight
467
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias
468
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight
469
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias
470
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight
471
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias
472
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight
473
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias
474
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight
475
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias
476
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight
477
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias
478
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight
479
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias
480
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight
481
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias
482
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight
483
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias
484
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight
485
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias
486
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight
487
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias
488
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight
489
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias
490
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight
491
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias
492
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight
493
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias
494
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight
495
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias
496
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight
497
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias
498
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight
499
+ 06/02/2024 14:27:46 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias
500
+ 06/02/2024 14:27:46 - INFO - __main__ - heads.sst2.1.weight
501
+ 06/02/2024 14:27:46 - INFO - __main__ - heads.sst2.1.bias
502
+ 06/02/2024 14:27:46 - INFO - __main__ - heads.sst2.4.weight
503
+ 06/02/2024 14:27:46 - INFO - __main__ - heads.sst2.4.bias
504
+ 06/02/2024 14:28:01 - INFO - __main__ - Sample 27303 of the training set: {'input_ids': [0, 10273, 5971, 2650, 2156, 114, 3694, 2156, 29166, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
505
+ 06/02/2024 14:28:01 - INFO - __main__ - Sample 48017 of the training set: {'input_ids': [0, 2629, 2526, 9288, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1], 'labels': 0}.
506
+ 06/02/2024 14:28:01 - INFO - __main__ - Sample 666 of the training set: {'input_ids': [0, 26692, 45, 173, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
507
+ 06/02/2024 15:00:21 - INFO - __main__ - f_mu shape : torch.Size([872, 2])
508
+ 06/02/2024 15:00:21 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2])
509
+ 06/02/2024 15:00:21 - INFO - __main__ - tensor([[-3.6767, 3.8599],
510
+ [ 1.2684, -1.2134],
511
+ [-1.9248, 1.9909],
512
+ ...,
513
+ [ 1.6397, -1.6343],
514
+ [ 2.6456, -2.6197],
515
+ [-2.3511, 2.5768]], device='cuda:0')
516
+ 06/02/2024 15:00:21 - INFO - __main__ - tensor([[[3.1317, 2.8936],
517
+ [2.8936, 3.0991]],
518
+
519
+ [[1.9052, 0.5840],
520
+ [0.5840, 1.8519]],
521
+
522
+ [[1.8741, 1.3538],
523
+ [1.3538, 1.9133]],
524
+
525
+ ...,
526
+
527
+ [[1.9867, 0.4452],
528
+ [0.4452, 1.9560]],
529
+
530
+ [[3.1267, 1.0338],
531
+ [1.0338, 2.9659]],
532
+
533
+ [[2.6059, 1.9332],
534
+ [1.9332, 2.6584]]], device='cuda:0')
535
+ 06/02/2024 15:00:21 - INFO - __main__ - ***** Completed training *****
536
+ 06/02/2024 15:00:23 - INFO - __main__ - Number of labels detected = 2
537
+ 06/02/2024 15:00:24 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
538
+ 06/02/2024 15:00:24 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/adapter_config.json
539
+ 06/02/2024 15:00:24 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'.
540
+ 06/02/2024 15:00:24 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/pytorch_adapter.bin
541
+ 06/02/2024 15:00:24 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/head_config.json
542
+ 06/02/2024 15:00:24 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
543
+ 06/02/2024 15:00:25 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/pytorch_model_head.bin
544
+ 06/02/2024 15:00:25 - INFO - __main__ - Adapter Name = sst2
545
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight
546
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias
547
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight
548
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias
549
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight
550
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias
551
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight
552
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias
553
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight
554
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias
555
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight
556
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias
557
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight
558
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias
559
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight
560
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias
561
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight
562
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias
563
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight
564
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias
565
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight
566
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias
567
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight
568
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias
569
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight
570
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias
571
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight
572
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias
573
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight
574
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias
575
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight
576
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias
577
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight
578
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias
579
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight
580
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias
581
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight
582
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias
583
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight
584
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias
585
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight
586
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias
587
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight
588
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias
589
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight
590
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias
591
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight
592
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias
593
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight
594
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias
595
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight
596
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias
597
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight
598
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias
599
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight
600
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias
601
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight
602
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias
603
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight
604
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias
605
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight
606
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias
607
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight
608
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias
609
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight
610
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias
611
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight
612
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias
613
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight
614
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias
615
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight
616
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias
617
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight
618
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias
619
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight
620
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias
621
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight
622
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias
623
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight
624
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias
625
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight
626
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias
627
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight
628
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias
629
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight
630
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias
631
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight
632
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias
633
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight
634
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias
635
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight
636
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias
637
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight
638
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias
639
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight
640
+ 06/02/2024 15:00:25 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias
641
+ 06/02/2024 15:00:25 - INFO - __main__ - heads.sst2.1.weight
642
+ 06/02/2024 15:00:25 - INFO - __main__ - heads.sst2.1.bias
643
+ 06/02/2024 15:00:25 - INFO - __main__ - heads.sst2.4.weight
644
+ 06/02/2024 15:00:25 - INFO - __main__ - heads.sst2.4.bias
645
+ 06/02/2024 15:00:38 - INFO - __main__ - Sample 27303 of the training set: {'input_ids': [0, 10273, 5971, 2650, 2156, 114, 3694, 2156, 29166, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
646
+ 06/02/2024 15:00:38 - INFO - __main__ - Sample 48017 of the training set: {'input_ids': [0, 2629, 2526, 9288, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1], 'labels': 0}.
647
+ 06/02/2024 15:00:38 - INFO - __main__ - Sample 666 of the training set: {'input_ids': [0, 26692, 45, 173, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
648
+ 06/02/2024 15:33:12 - INFO - __main__ - f_mu shape : torch.Size([872, 2])
649
+ 06/02/2024 15:33:12 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2])
650
+ 06/02/2024 15:33:12 - INFO - __main__ - tensor([[-4.2006, 4.1726],
651
+ [ 1.0005, -0.9923],
652
+ [-2.3267, 2.3123],
653
+ ...,
654
+ [ 1.8610, -1.8978],
655
+ [ 2.0294, -2.0685],
656
+ [-2.7907, 2.8982]], device='cuda:0')
657
+ 06/02/2024 15:33:12 - INFO - __main__ - tensor([[[3.3278, 3.0577],
658
+ [3.0577, 3.2993]],
659
+
660
+ [[1.6975, 0.8979],
661
+ [0.8979, 1.6665]],
662
+
663
+ [[2.2034, 1.2772],
664
+ [1.2772, 2.2290]],
665
+
666
+ ...,
667
+
668
+ [[2.1030, 1.2174],
669
+ [1.2174, 2.0878]],
670
+
671
+ [[2.4436, 1.5252],
672
+ [1.5252, 2.3909]],
673
+
674
+ [[3.0498, 1.5716],
675
+ [1.5716, 3.0871]]], device='cuda:0')
676
+ 06/02/2024 15:33:12 - INFO - __main__ - ***** Completed training *****
677
+ 06/02/2024 15:33:14 - INFO - __main__ - Number of labels detected = 2
678
+ 06/02/2024 15:33:15 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
679
+ 06/02/2024 15:33:16 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/adapter_config.json
680
+ 06/02/2024 15:33:16 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'.
681
+ 06/02/2024 15:33:16 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/pytorch_adapter.bin
682
+ 06/02/2024 15:33:16 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/head_config.json
683
+ 06/02/2024 15:33:16 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
684
+ 06/02/2024 15:33:16 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/pytorch_model_head.bin
685
+ 06/02/2024 15:33:16 - INFO - __main__ - Adapter Name = sst2
686
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight
687
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias
688
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight
689
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias
690
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight
691
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias
692
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight
693
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias
694
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight
695
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias
696
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight
697
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias
698
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight
699
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias
700
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight
701
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias
702
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight
703
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias
704
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight
705
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias
706
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight
707
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias
708
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight
709
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias
710
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight
711
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias
712
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight
713
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias
714
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight
715
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias
716
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight
717
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias
718
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight
719
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias
720
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight
721
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias
722
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight
723
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias
724
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight
725
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias
726
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight
727
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias
728
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight
729
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias
730
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight
731
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias
732
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight
733
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias
734
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight
735
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias
736
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight
737
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias
738
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight
739
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias
740
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight
741
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias
742
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight
743
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias
744
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight
745
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias
746
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight
747
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias
748
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight
749
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias
750
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight
751
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias
752
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight
753
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias
754
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight
755
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias
756
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight
757
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias
758
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight
759
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias
760
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight
761
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias
762
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight
763
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias
764
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight
765
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias
766
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight
767
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias
768
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight
769
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias
770
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight
771
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias
772
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight
773
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias
774
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight
775
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias
776
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight
777
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias
778
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight
779
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias
780
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight
781
+ 06/02/2024 15:33:16 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias
782
+ 06/02/2024 15:33:16 - INFO - __main__ - heads.sst2.1.weight
783
+ 06/02/2024 15:33:16 - INFO - __main__ - heads.sst2.1.bias
784
+ 06/02/2024 15:33:16 - INFO - __main__ - heads.sst2.4.weight
785
+ 06/02/2024 15:33:16 - INFO - __main__ - heads.sst2.4.bias
786
+ 06/02/2024 15:33:30 - INFO - __main__ - Sample 27303 of the training set: {'input_ids': [0, 10273, 5971, 2650, 2156, 114, 3694, 2156, 29166, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
787
+ 06/02/2024 15:33:30 - INFO - __main__ - Sample 48017 of the training set: {'input_ids': [0, 2629, 2526, 9288, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1], 'labels': 0}.
788
+ 06/02/2024 15:33:30 - INFO - __main__ - Sample 666 of the training set: {'input_ids': [0, 26692, 45, 173, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
789
+ 06/02/2024 16:06:18 - INFO - __main__ - f_mu shape : torch.Size([872, 2])
790
+ 06/02/2024 16:06:18 - INFO - __main__ - f_var shape : torch.Size([872, 2, 2])
791
+ 06/02/2024 16:06:18 - INFO - __main__ - tensor([[-4.6010, 4.5464],
792
+ [ 0.8965, -0.9374],
793
+ [-2.6567, 2.6105],
794
+ ...,
795
+ [ 1.9415, -2.0003],
796
+ [ 2.2464, -2.3064],
797
+ [-2.7984, 2.8625]], device='cuda:0')
798
+ 06/02/2024 16:06:18 - INFO - __main__ - tensor([[[3.5315, 3.3129],
799
+ [3.3129, 3.4958]],
800
+
801
+ [[1.6473, 0.6023],
802
+ [0.6023, 1.6201]],
803
+
804
+ [[2.4584, 1.3256],
805
+ [1.3256, 2.4776]],
806
+
807
+ ...,
808
+
809
+ [[2.0444, 1.0625],
810
+ [1.0625, 2.0286]],
811
+
812
+ [[2.5821, 1.2764],
813
+ [1.2764, 2.5041]],
814
+
815
+ [[3.6194, 0.7417],
816
+ [0.7417, 3.6767]]], device='cuda:0')
817
+ 06/02/2024 16:06:18 - INFO - __main__ - ***** Completed training *****
818
+ 06/02/2024 16:06:20 - INFO - __main__ - Number of labels detected = 2
819
+ 06/02/2024 16:06:21 - INFO - adapters.heads.model_mixin - Adding head 'default' with config {'head_type': 'masked_lm', 'vocab_size': 50265, 'embedding_size': 768, 'layers': 2, 'activation_function': 'gelu', 'layer_norm': True, 'bias': True, 'shift_labels': False, 'label2id': None}.
820
+ 06/02/2024 16:06:22 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/adapter_config.json
821
+ 06/02/2024 16:06:22 - INFO - adapters.configuration.model_adapters_config - Adding adapter 'sst2'.
822
+ 06/02/2024 16:06:22 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/pytorch_adapter.bin
823
+ 06/02/2024 16:06:22 - INFO - adapters.loading - Loading module configuration from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/head_config.json
824
+ 06/02/2024 16:06:22 - INFO - adapters.heads.model_mixin - Adding head 'sst2' with config {'head_type': 'classification', 'num_labels': 2, 'layers': 2, 'activation_function': 'tanh', 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'use_pooler': False, 'bias': True, 'dropout_prob': None}.
825
+ 06/02/2024 16:06:22 - INFO - adapters.loading - Loading module weights from ./outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_9999/pytorch_model_head.bin
826
+ 06/02/2024 16:06:22 - INFO - __main__ - Adapter Name = sst2
827
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.weight
828
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_down.0.bias
829
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.weight
830
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.0.attention.output.adapters.sst2.adapter_up.bias
831
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.weight
832
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_down.0.bias
833
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.weight
834
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.0.output.adapters.sst2.adapter_up.bias
835
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.weight
836
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_down.0.bias
837
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.weight
838
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.1.attention.output.adapters.sst2.adapter_up.bias
839
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.weight
840
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_down.0.bias
841
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.weight
842
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.1.output.adapters.sst2.adapter_up.bias
843
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.weight
844
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_down.0.bias
845
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.weight
846
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.2.attention.output.adapters.sst2.adapter_up.bias
847
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.weight
848
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_down.0.bias
849
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.weight
850
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.2.output.adapters.sst2.adapter_up.bias
851
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.weight
852
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_down.0.bias
853
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.weight
854
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.3.attention.output.adapters.sst2.adapter_up.bias
855
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.weight
856
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_down.0.bias
857
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.weight
858
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.3.output.adapters.sst2.adapter_up.bias
859
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.weight
860
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_down.0.bias
861
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.weight
862
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.4.attention.output.adapters.sst2.adapter_up.bias
863
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.weight
864
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_down.0.bias
865
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.weight
866
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.4.output.adapters.sst2.adapter_up.bias
867
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.weight
868
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_down.0.bias
869
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.weight
870
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.5.attention.output.adapters.sst2.adapter_up.bias
871
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.weight
872
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_down.0.bias
873
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.weight
874
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.5.output.adapters.sst2.adapter_up.bias
875
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.weight
876
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_down.0.bias
877
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.weight
878
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.6.attention.output.adapters.sst2.adapter_up.bias
879
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.weight
880
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_down.0.bias
881
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.weight
882
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.6.output.adapters.sst2.adapter_up.bias
883
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.weight
884
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_down.0.bias
885
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.weight
886
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.7.attention.output.adapters.sst2.adapter_up.bias
887
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.weight
888
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_down.0.bias
889
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.weight
890
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.7.output.adapters.sst2.adapter_up.bias
891
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.weight
892
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_down.0.bias
893
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.weight
894
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.8.attention.output.adapters.sst2.adapter_up.bias
895
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.weight
896
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_down.0.bias
897
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.weight
898
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.8.output.adapters.sst2.adapter_up.bias
899
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.weight
900
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_down.0.bias
901
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.weight
902
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.9.attention.output.adapters.sst2.adapter_up.bias
903
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.weight
904
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_down.0.bias
905
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.weight
906
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.9.output.adapters.sst2.adapter_up.bias
907
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.weight
908
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_down.0.bias
909
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.weight
910
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.10.attention.output.adapters.sst2.adapter_up.bias
911
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.weight
912
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_down.0.bias
913
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.weight
914
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.10.output.adapters.sst2.adapter_up.bias
915
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.weight
916
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_down.0.bias
917
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.weight
918
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.11.attention.output.adapters.sst2.adapter_up.bias
919
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.weight
920
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_down.0.bias
921
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.weight
922
+ 06/02/2024 16:06:22 - INFO - __main__ - roberta.encoder.layer.11.output.adapters.sst2.adapter_up.bias
923
+ 06/02/2024 16:06:22 - INFO - __main__ - heads.sst2.1.weight
924
+ 06/02/2024 16:06:22 - INFO - __main__ - heads.sst2.1.bias
925
+ 06/02/2024 16:06:22 - INFO - __main__ - heads.sst2.4.weight
926
+ 06/02/2024 16:06:22 - INFO - __main__ - heads.sst2.4.bias
927
+ 06/02/2024 16:06:36 - INFO - __main__ - Sample 27303 of the training set: {'input_ids': [0, 10273, 5971, 2650, 2156, 114, 3694, 2156, 29166, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 'labels': 1}.
928
+ 06/02/2024 16:06:36 - INFO - __main__ - Sample 48017 of the training set: {'input_ids': [0, 2629, 2526, 9288, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1], 'labels': 0}.
929
+ 06/02/2024 16:06:36 - INFO - __main__ - Sample 666 of the training set: {'input_ids': [0, 26692, 45, 173, 479, 1437, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1], 'labels': 0}.
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.9197247706422018}
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_1999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1038383104,
3
+ "max_memory_allocated": 5068723200,
4
+ "memory_reserved": 5362417664,
5
+ "max_memory_reserved": 15634268160,
6
+ "memory_stats": {
7
+ "active.all.allocated": 14684790,
8
+ "active.all.current": 1189,
9
+ "active.all.freed": 14683601,
10
+ "active.all.peak": 1441,
11
+ "active.large_pool.allocated": 2781857,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 2781573,
14
+ "active.large_pool.peak": 457,
15
+ "active.small_pool.allocated": 11902933,
16
+ "active.small_pool.current": 905,
17
+ "active.small_pool.freed": 11902028,
18
+ "active.small_pool.peak": 1107,
19
+ "active_bytes.all.allocated": 12184115120640,
20
+ "active_bytes.all.current": 1038383104,
21
+ "active_bytes.all.freed": 12183076737536,
22
+ "active_bytes.all.peak": 5068723200,
23
+ "active_bytes.large_pool.allocated": 7534776679424,
24
+ "active_bytes.large_pool.current": 1021138944,
25
+ "active_bytes.large_pool.freed": 7533755540480,
26
+ "active_bytes.large_pool.peak": 5044402176,
27
+ "active_bytes.small_pool.allocated": 4649338441216,
28
+ "active_bytes.small_pool.current": 17244160,
29
+ "active_bytes.small_pool.freed": 4649321197056,
30
+ "active_bytes.small_pool.peak": 137228800,
31
+ "allocated_bytes.all.allocated": 12184115120640,
32
+ "allocated_bytes.all.current": 1038383104,
33
+ "allocated_bytes.all.freed": 12183076737536,
34
+ "allocated_bytes.all.peak": 5068723200,
35
+ "allocated_bytes.large_pool.allocated": 7534776679424,
36
+ "allocated_bytes.large_pool.current": 1021138944,
37
+ "allocated_bytes.large_pool.freed": 7533755540480,
38
+ "allocated_bytes.large_pool.peak": 5044402176,
39
+ "allocated_bytes.small_pool.allocated": 4649338441216,
40
+ "allocated_bytes.small_pool.current": 17244160,
41
+ "allocated_bytes.small_pool.freed": 4649321197056,
42
+ "allocated_bytes.small_pool.peak": 137228800,
43
+ "allocation.all.allocated": 14684790,
44
+ "allocation.all.current": 1189,
45
+ "allocation.all.freed": 14683601,
46
+ "allocation.all.peak": 1441,
47
+ "allocation.large_pool.allocated": 2781857,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 2781573,
50
+ "allocation.large_pool.peak": 457,
51
+ "allocation.small_pool.allocated": 11902933,
52
+ "allocation.small_pool.current": 905,
53
+ "allocation.small_pool.freed": 11902028,
54
+ "allocation.small_pool.peak": 1107,
55
+ "inactive_split.all.allocated": 7296524,
56
+ "inactive_split.all.current": 161,
57
+ "inactive_split.all.freed": 7296363,
58
+ "inactive_split.all.peak": 232,
59
+ "inactive_split.large_pool.allocated": 1397712,
60
+ "inactive_split.large_pool.current": 42,
61
+ "inactive_split.large_pool.freed": 1397670,
62
+ "inactive_split.large_pool.peak": 98,
63
+ "inactive_split.small_pool.allocated": 5898812,
64
+ "inactive_split.small_pool.current": 119,
65
+ "inactive_split.small_pool.freed": 5898693,
66
+ "inactive_split.small_pool.peak": 146,
67
+ "inactive_split_bytes.all.allocated": 14518358885888,
68
+ "inactive_split_bytes.all.current": 161187840,
69
+ "inactive_split_bytes.all.freed": 14518197698048,
70
+ "inactive_split_bytes.all.peak": 1916622848,
71
+ "inactive_split_bytes.large_pool.allocated": 9706063452672,
72
+ "inactive_split_bytes.large_pool.current": 98740224,
73
+ "inactive_split_bytes.large_pool.freed": 9705964712448,
74
+ "inactive_split_bytes.large_pool.peak": 1881359360,
75
+ "inactive_split_bytes.small_pool.allocated": 4812295433216,
76
+ "inactive_split_bytes.small_pool.current": 62447616,
77
+ "inactive_split_bytes.small_pool.freed": 4812232985600,
78
+ "inactive_split_bytes.small_pool.peak": 103962112,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 1,
81
+ "num_device_alloc": 465,
82
+ "num_device_free": 296,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 2,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 11927076483933,
94
+ "requested_bytes.all.current": 1027774812,
95
+ "requested_bytes.all.freed": 11926048709121,
96
+ "requested_bytes.all.peak": 5016531548,
97
+ "requested_bytes.large_pool.allocated": 7278340736064,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 7277330065976,
100
+ "requested_bytes.large_pool.peak": 4992349256,
101
+ "requested_bytes.small_pool.allocated": 4648735747869,
102
+ "requested_bytes.small_pool.current": 17104724,
103
+ "requested_bytes.small_pool.freed": 4648718643145,
104
+ "requested_bytes.small_pool.peak": 137155084,
105
+ "reserved_bytes.all.allocated": 17504927744,
106
+ "reserved_bytes.all.current": 5362417664,
107
+ "reserved_bytes.all.freed": 12142510080,
108
+ "reserved_bytes.all.peak": 15634268160,
109
+ "reserved_bytes.large_pool.allocated": 17213423616,
110
+ "reserved_bytes.large_pool.current": 5224005632,
111
+ "reserved_bytes.large_pool.freed": 11989417984,
112
+ "reserved_bytes.large_pool.peak": 15489564672,
113
+ "reserved_bytes.small_pool.allocated": 291504128,
114
+ "reserved_bytes.small_pool.current": 138412032,
115
+ "reserved_bytes.small_pool.freed": 153092096,
116
+ "reserved_bytes.small_pool.peak": 144703488,
117
+ "segment.all.allocated": 465,
118
+ "segment.all.current": 169,
119
+ "segment.all.freed": 296,
120
+ "segment.all.peak": 365,
121
+ "segment.large_pool.allocated": 326,
122
+ "segment.large_pool.current": 103,
123
+ "segment.large_pool.freed": 223,
124
+ "segment.large_pool.peak": 296,
125
+ "segment.small_pool.allocated": 139,
126
+ "segment.small_pool.current": 66,
127
+ "segment.small_pool.freed": 73,
128
+ "segment.small_pool.peak": 69
129
+ }
130
+ }
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.930045871559633}
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_3999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1039306752,
3
+ "max_memory_allocated": 5068723200,
4
+ "memory_reserved": 5366611968,
5
+ "max_memory_reserved": 15634268160,
6
+ "memory_stats": {
7
+ "active.all.allocated": 29369628,
8
+ "active.all.current": 1189,
9
+ "active.all.freed": 29368439,
10
+ "active.all.peak": 1441,
11
+ "active.large_pool.allocated": 5563712,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 5563428,
14
+ "active.large_pool.peak": 457,
15
+ "active.small_pool.allocated": 23805916,
16
+ "active.small_pool.current": 905,
17
+ "active.small_pool.freed": 23805011,
18
+ "active.small_pool.peak": 1107,
19
+ "active_bytes.all.allocated": 24356802661888,
20
+ "active_bytes.all.current": 1039306752,
21
+ "active_bytes.all.freed": 24355763355136,
22
+ "active_bytes.all.peak": 5068723200,
23
+ "active_bytes.large_pool.allocated": 15058125752832,
24
+ "active_bytes.large_pool.current": 1022062592,
25
+ "active_bytes.large_pool.freed": 15057103690240,
26
+ "active_bytes.large_pool.peak": 5044402176,
27
+ "active_bytes.small_pool.allocated": 9298676909056,
28
+ "active_bytes.small_pool.current": 17244160,
29
+ "active_bytes.small_pool.freed": 9298659664896,
30
+ "active_bytes.small_pool.peak": 137228800,
31
+ "allocated_bytes.all.allocated": 24356802661888,
32
+ "allocated_bytes.all.current": 1039306752,
33
+ "allocated_bytes.all.freed": 24355763355136,
34
+ "allocated_bytes.all.peak": 5068723200,
35
+ "allocated_bytes.large_pool.allocated": 15058125752832,
36
+ "allocated_bytes.large_pool.current": 1022062592,
37
+ "allocated_bytes.large_pool.freed": 15057103690240,
38
+ "allocated_bytes.large_pool.peak": 5044402176,
39
+ "allocated_bytes.small_pool.allocated": 9298676909056,
40
+ "allocated_bytes.small_pool.current": 17244160,
41
+ "allocated_bytes.small_pool.freed": 9298659664896,
42
+ "allocated_bytes.small_pool.peak": 137228800,
43
+ "allocation.all.allocated": 29369628,
44
+ "allocation.all.current": 1189,
45
+ "allocation.all.freed": 29368439,
46
+ "allocation.all.peak": 1441,
47
+ "allocation.large_pool.allocated": 5563712,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 5563428,
50
+ "allocation.large_pool.peak": 457,
51
+ "allocation.small_pool.allocated": 23805916,
52
+ "allocation.small_pool.current": 905,
53
+ "allocation.small_pool.freed": 23805011,
54
+ "allocation.small_pool.peak": 1107,
55
+ "inactive_split.all.allocated": 14557259,
56
+ "inactive_split.all.current": 145,
57
+ "inactive_split.all.freed": 14557114,
58
+ "inactive_split.all.peak": 232,
59
+ "inactive_split.large_pool.allocated": 2788999,
60
+ "inactive_split.large_pool.current": 41,
61
+ "inactive_split.large_pool.freed": 2788958,
62
+ "inactive_split.large_pool.peak": 102,
63
+ "inactive_split.small_pool.allocated": 11768260,
64
+ "inactive_split.small_pool.current": 104,
65
+ "inactive_split.small_pool.freed": 11768156,
66
+ "inactive_split.small_pool.peak": 172,
67
+ "inactive_split_bytes.all.allocated": 29014462114304,
68
+ "inactive_split_bytes.all.current": 153972736,
69
+ "inactive_split_bytes.all.freed": 29014308141568,
70
+ "inactive_split_bytes.all.peak": 1916622848,
71
+ "inactive_split_bytes.large_pool.allocated": 19380757452800,
72
+ "inactive_split_bytes.large_pool.current": 97816576,
73
+ "inactive_split_bytes.large_pool.freed": 19380659636224,
74
+ "inactive_split_bytes.large_pool.peak": 1881359360,
75
+ "inactive_split_bytes.small_pool.allocated": 9633704661504,
76
+ "inactive_split_bytes.small_pool.current": 56156160,
77
+ "inactive_split_bytes.small_pool.freed": 9633648505344,
78
+ "inactive_split_bytes.small_pool.peak": 103962112,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 2,
81
+ "num_device_alloc": 868,
82
+ "num_device_free": 696,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 5,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 23854135931542,
94
+ "requested_bytes.all.current": 1027774812,
95
+ "requested_bytes.all.freed": 23853108156730,
96
+ "requested_bytes.all.peak": 5016531548,
97
+ "requested_bytes.large_pool.allocated": 14556664432768,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 14555653762680,
100
+ "requested_bytes.large_pool.peak": 4992349256,
101
+ "requested_bytes.small_pool.allocated": 9297471498774,
102
+ "requested_bytes.small_pool.current": 17104724,
103
+ "requested_bytes.small_pool.freed": 9297454394050,
104
+ "requested_bytes.small_pool.peak": 137155084,
105
+ "reserved_bytes.all.allocated": 34177286144,
106
+ "reserved_bytes.all.current": 5366611968,
107
+ "reserved_bytes.all.freed": 28810674176,
108
+ "reserved_bytes.all.peak": 15634268160,
109
+ "reserved_bytes.large_pool.allocated": 33661386752,
110
+ "reserved_bytes.large_pool.current": 5221908480,
111
+ "reserved_bytes.large_pool.freed": 28439478272,
112
+ "reserved_bytes.large_pool.peak": 15489564672,
113
+ "reserved_bytes.small_pool.allocated": 515899392,
114
+ "reserved_bytes.small_pool.current": 144703488,
115
+ "reserved_bytes.small_pool.freed": 371195904,
116
+ "reserved_bytes.small_pool.peak": 146800640,
117
+ "segment.all.allocated": 868,
118
+ "segment.all.current": 172,
119
+ "segment.all.freed": 696,
120
+ "segment.all.peak": 366,
121
+ "segment.large_pool.allocated": 622,
122
+ "segment.large_pool.current": 103,
123
+ "segment.large_pool.freed": 519,
124
+ "segment.large_pool.peak": 296,
125
+ "segment.small_pool.allocated": 246,
126
+ "segment.small_pool.current": 69,
127
+ "segment.small_pool.freed": 177,
128
+ "segment.small_pool.peak": 70
129
+ }
130
+ }
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.9357798165137615}
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_5999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1041354752,
3
+ "max_memory_allocated": 5068723200,
4
+ "memory_reserved": 5360320512,
5
+ "max_memory_reserved": 15634268160,
6
+ "memory_stats": {
7
+ "active.all.allocated": 44054516,
8
+ "active.all.current": 1189,
9
+ "active.all.freed": 44053327,
10
+ "active.all.peak": 1441,
11
+ "active.large_pool.allocated": 8345567,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 8345283,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 35708949,
16
+ "active.small_pool.current": 905,
17
+ "active.small_pool.freed": 35708044,
18
+ "active.small_pool.peak": 1107,
19
+ "active_bytes.all.allocated": 36540099486208,
20
+ "active_bytes.all.current": 1041354752,
21
+ "active_bytes.all.freed": 36539058131456,
22
+ "active_bytes.all.peak": 5068723200,
23
+ "active_bytes.large_pool.allocated": 22592084082688,
24
+ "active_bytes.large_pool.current": 1024110592,
25
+ "active_bytes.large_pool.freed": 22591059972096,
26
+ "active_bytes.large_pool.peak": 5044402176,
27
+ "active_bytes.small_pool.allocated": 13948015403520,
28
+ "active_bytes.small_pool.current": 17244160,
29
+ "active_bytes.small_pool.freed": 13947998159360,
30
+ "active_bytes.small_pool.peak": 137228800,
31
+ "allocated_bytes.all.allocated": 36540099486208,
32
+ "allocated_bytes.all.current": 1041354752,
33
+ "allocated_bytes.all.freed": 36539058131456,
34
+ "allocated_bytes.all.peak": 5068723200,
35
+ "allocated_bytes.large_pool.allocated": 22592084082688,
36
+ "allocated_bytes.large_pool.current": 1024110592,
37
+ "allocated_bytes.large_pool.freed": 22591059972096,
38
+ "allocated_bytes.large_pool.peak": 5044402176,
39
+ "allocated_bytes.small_pool.allocated": 13948015403520,
40
+ "allocated_bytes.small_pool.current": 17244160,
41
+ "allocated_bytes.small_pool.freed": 13947998159360,
42
+ "allocated_bytes.small_pool.peak": 137228800,
43
+ "allocation.all.allocated": 44054516,
44
+ "allocation.all.current": 1189,
45
+ "allocation.all.freed": 44053327,
46
+ "allocation.all.peak": 1441,
47
+ "allocation.large_pool.allocated": 8345567,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 8345283,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 35708949,
52
+ "allocation.small_pool.current": 905,
53
+ "allocation.small_pool.freed": 35708044,
54
+ "allocation.small_pool.peak": 1107,
55
+ "inactive_split.all.allocated": 21846975,
56
+ "inactive_split.all.current": 156,
57
+ "inactive_split.all.freed": 21846819,
58
+ "inactive_split.all.peak": 232,
59
+ "inactive_split.large_pool.allocated": 4200236,
60
+ "inactive_split.large_pool.current": 41,
61
+ "inactive_split.large_pool.freed": 4200195,
62
+ "inactive_split.large_pool.peak": 102,
63
+ "inactive_split.small_pool.allocated": 17646739,
64
+ "inactive_split.small_pool.current": 115,
65
+ "inactive_split.small_pool.freed": 17646624,
66
+ "inactive_split.small_pool.peak": 172,
67
+ "inactive_split_bytes.all.allocated": 43039525160448,
68
+ "inactive_split_bytes.all.current": 162410496,
69
+ "inactive_split_bytes.all.freed": 43039362749952,
70
+ "inactive_split_bytes.all.peak": 1916622848,
71
+ "inactive_split_bytes.large_pool.allocated": 28685771414528,
72
+ "inactive_split_bytes.large_pool.current": 95768576,
73
+ "inactive_split_bytes.large_pool.freed": 28685675645952,
74
+ "inactive_split_bytes.large_pool.peak": 1881359360,
75
+ "inactive_split_bytes.small_pool.allocated": 14353753745920,
76
+ "inactive_split_bytes.small_pool.current": 66641920,
77
+ "inactive_split_bytes.small_pool.freed": 14353687104000,
78
+ "inactive_split_bytes.small_pool.peak": 105467904,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 3,
81
+ "num_device_alloc": 1296,
82
+ "num_device_free": 1128,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 8,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 35781195382187,
94
+ "requested_bytes.all.current": 1027774812,
95
+ "requested_bytes.all.freed": 35780167607375,
96
+ "requested_bytes.all.peak": 5016531548,
97
+ "requested_bytes.large_pool.allocated": 21834988129472,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 21833977459384,
100
+ "requested_bytes.large_pool.peak": 4992349256,
101
+ "requested_bytes.small_pool.allocated": 13946207252715,
102
+ "requested_bytes.small_pool.current": 17104724,
103
+ "requested_bytes.small_pool.freed": 13946190147991,
104
+ "requested_bytes.small_pool.peak": 137155084,
105
+ "reserved_bytes.all.allocated": 51510247424,
106
+ "reserved_bytes.all.current": 5360320512,
107
+ "reserved_bytes.all.freed": 46149926912,
108
+ "reserved_bytes.all.peak": 15634268160,
109
+ "reserved_bytes.large_pool.allocated": 50769952768,
110
+ "reserved_bytes.large_pool.current": 5224005632,
111
+ "reserved_bytes.large_pool.freed": 45545947136,
112
+ "reserved_bytes.large_pool.peak": 15489564672,
113
+ "reserved_bytes.small_pool.allocated": 740294656,
114
+ "reserved_bytes.small_pool.current": 136314880,
115
+ "reserved_bytes.small_pool.freed": 603979776,
116
+ "reserved_bytes.small_pool.peak": 146800640,
117
+ "segment.all.allocated": 1296,
118
+ "segment.all.current": 168,
119
+ "segment.all.freed": 1128,
120
+ "segment.all.peak": 366,
121
+ "segment.large_pool.allocated": 943,
122
+ "segment.large_pool.current": 103,
123
+ "segment.large_pool.freed": 840,
124
+ "segment.large_pool.peak": 296,
125
+ "segment.small_pool.allocated": 353,
126
+ "segment.small_pool.current": 65,
127
+ "segment.small_pool.freed": 288,
128
+ "segment.small_pool.peak": 70
129
+ }
130
+ }
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/all_results_la_kron_all_homo_mc_corr_1000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval_accuracy": 0.9334862385321101}
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/eval_res_la_kron_all_homo_mc_corr_1000.json ADDED
The diff for this file is too large to render. See raw diff
 
outputs/sst2/roberta-base_adapterstrain_val_0.0001_12345_8_10000/step_7999/gpu_stats_la.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "memory_allocated": 1039912960,
3
+ "max_memory_allocated": 5068723200,
4
+ "memory_reserved": 5358223360,
5
+ "max_memory_reserved": 15634268160,
6
+ "memory_stats": {
7
+ "active.all.allocated": 58739454,
8
+ "active.all.current": 1189,
9
+ "active.all.freed": 58738265,
10
+ "active.all.peak": 1441,
11
+ "active.large_pool.allocated": 11127422,
12
+ "active.large_pool.current": 284,
13
+ "active.large_pool.freed": 11127138,
14
+ "active.large_pool.peak": 482,
15
+ "active.small_pool.allocated": 47612032,
16
+ "active.small_pool.current": 905,
17
+ "active.small_pool.freed": 47611127,
18
+ "active.small_pool.peak": 1107,
19
+ "active_bytes.all.allocated": 48723611251200,
20
+ "active_bytes.all.current": 1039912960,
21
+ "active_bytes.all.freed": 48722571338240,
22
+ "active_bytes.all.peak": 5068723200,
23
+ "active_bytes.large_pool.allocated": 30126257326592,
24
+ "active_bytes.large_pool.current": 1022668800,
25
+ "active_bytes.large_pool.freed": 30125234657792,
26
+ "active_bytes.large_pool.peak": 5044402176,
27
+ "active_bytes.small_pool.allocated": 18597353924608,
28
+ "active_bytes.small_pool.current": 17244160,
29
+ "active_bytes.small_pool.freed": 18597336680448,
30
+ "active_bytes.small_pool.peak": 137228800,
31
+ "allocated_bytes.all.allocated": 48723611251200,
32
+ "allocated_bytes.all.current": 1039912960,
33
+ "allocated_bytes.all.freed": 48722571338240,
34
+ "allocated_bytes.all.peak": 5068723200,
35
+ "allocated_bytes.large_pool.allocated": 30126257326592,
36
+ "allocated_bytes.large_pool.current": 1022668800,
37
+ "allocated_bytes.large_pool.freed": 30125234657792,
38
+ "allocated_bytes.large_pool.peak": 5044402176,
39
+ "allocated_bytes.small_pool.allocated": 18597353924608,
40
+ "allocated_bytes.small_pool.current": 17244160,
41
+ "allocated_bytes.small_pool.freed": 18597336680448,
42
+ "allocated_bytes.small_pool.peak": 137228800,
43
+ "allocation.all.allocated": 58739454,
44
+ "allocation.all.current": 1189,
45
+ "allocation.all.freed": 58738265,
46
+ "allocation.all.peak": 1441,
47
+ "allocation.large_pool.allocated": 11127422,
48
+ "allocation.large_pool.current": 284,
49
+ "allocation.large_pool.freed": 11127138,
50
+ "allocation.large_pool.peak": 482,
51
+ "allocation.small_pool.allocated": 47612032,
52
+ "allocation.small_pool.current": 905,
53
+ "allocation.small_pool.freed": 47611127,
54
+ "allocation.small_pool.peak": 1107,
55
+ "inactive_split.all.allocated": 29030003,
56
+ "inactive_split.all.current": 152,
57
+ "inactive_split.all.freed": 29029851,
58
+ "inactive_split.all.peak": 233,
59
+ "inactive_split.large_pool.allocated": 5594529,
60
+ "inactive_split.large_pool.current": 44,
61
+ "inactive_split.large_pool.freed": 5594485,
62
+ "inactive_split.large_pool.peak": 102,
63
+ "inactive_split.small_pool.allocated": 23435474,
64
+ "inactive_split.small_pool.current": 108,
65
+ "inactive_split.small_pool.freed": 23435366,
66
+ "inactive_split.small_pool.peak": 189,
67
+ "inactive_split_bytes.all.allocated": 56363395568640,
68
+ "inactive_split_bytes.all.current": 165949440,
69
+ "inactive_split_bytes.all.freed": 56363229619200,
70
+ "inactive_split_bytes.all.peak": 1916622848,
71
+ "inactive_split_bytes.large_pool.allocated": 37278295778304,
72
+ "inactive_split_bytes.large_pool.current": 97210368,
73
+ "inactive_split_bytes.large_pool.freed": 37278198567936,
74
+ "inactive_split_bytes.large_pool.peak": 1881359360,
75
+ "inactive_split_bytes.small_pool.allocated": 19085099790336,
76
+ "inactive_split_bytes.small_pool.current": 68739072,
77
+ "inactive_split_bytes.small_pool.freed": 19085031051264,
78
+ "inactive_split_bytes.small_pool.peak": 106288128,
79
+ "max_split_size": -1,
80
+ "num_alloc_retries": 4,
81
+ "num_device_alloc": 1722,
82
+ "num_device_free": 1554,
83
+ "num_ooms": 0,
84
+ "num_sync_all_streams": 11,
85
+ "oversize_allocations.allocated": 0,
86
+ "oversize_allocations.current": 0,
87
+ "oversize_allocations.freed": 0,
88
+ "oversize_allocations.peak": 0,
89
+ "oversize_segments.allocated": 0,
90
+ "oversize_segments.current": 0,
91
+ "oversize_segments.freed": 0,
92
+ "oversize_segments.peak": 0,
93
+ "requested_bytes.all.allocated": 47708254835868,
94
+ "requested_bytes.all.current": 1027774812,
95
+ "requested_bytes.all.freed": 47707227061056,
96
+ "requested_bytes.all.peak": 5016531548,
97
+ "requested_bytes.large_pool.allocated": 29113311826176,
98
+ "requested_bytes.large_pool.current": 1010670088,
99
+ "requested_bytes.large_pool.freed": 29112301156088,
100
+ "requested_bytes.large_pool.peak": 4992349256,
101
+ "requested_bytes.small_pool.allocated": 18594943009692,
102
+ "requested_bytes.small_pool.current": 17104724,
103
+ "requested_bytes.small_pool.freed": 18594925904968,
104
+ "requested_bytes.small_pool.peak": 137155084,
105
+ "reserved_bytes.all.allocated": 68836917248,
106
+ "reserved_bytes.all.current": 5358223360,
107
+ "reserved_bytes.all.freed": 63478693888,
108
+ "reserved_bytes.all.peak": 15634268160,
109
+ "reserved_bytes.large_pool.allocated": 67876421632,
110
+ "reserved_bytes.large_pool.current": 5221908480,
111
+ "reserved_bytes.large_pool.freed": 62654513152,
112
+ "reserved_bytes.large_pool.peak": 15489564672,
113
+ "reserved_bytes.small_pool.allocated": 960495616,
114
+ "reserved_bytes.small_pool.current": 136314880,
115
+ "reserved_bytes.small_pool.freed": 824180736,
116
+ "reserved_bytes.small_pool.peak": 146800640,
117
+ "segment.all.allocated": 1722,
118
+ "segment.all.current": 168,
119
+ "segment.all.freed": 1554,
120
+ "segment.all.peak": 366,
121
+ "segment.large_pool.allocated": 1264,
122
+ "segment.large_pool.current": 103,
123
+ "segment.large_pool.freed": 1161,
124
+ "segment.large_pool.peak": 296,
125
+ "segment.small_pool.allocated": 458,
126
+ "segment.small_pool.current": 65,
127
+ "segment.small_pool.freed": 393,
128
+ "segment.small_pool.peak": 70
129
+ }
130
+ }
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_1999/f_mu_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce635d68b7798b040f4ec0edc59e446d088dd8f31e35c11b71bc9cec49bc10a4
3
+ size 8300
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_1999/f_var_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b4fe74f7ce87f6f0122d9f18263e74ec5497986ac4daa8f1bda00bfbf6c2ff3
3
+ size 15281
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_1999/prior_precision_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5feb68f3fc35ebf295efd00e93d39cc734af4f3796e7038248b7b9e101cbced
3
+ size 1379
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_3999/f_mu_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92fc970e7bc3095e867ba4e3d34c08c2d36afed3b99d5086b216734b2b1dbee3
3
+ size 8300
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_3999/f_var_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9de8d60d7b7f875bfdcbc414c5d7a081a0ae6986ff8b6710d15146d3415aad84
3
+ size 15281
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_3999/prior_precision_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f53598b82dda52a645ea3bdf4e331516dd2f26dd9c9a6a0b88d241e21972d077
3
+ size 1379
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_5999/f_mu_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b541322d8ee917ead84e1368c1a5a8140e7bd8712a3f26a174a1c44b015d654
3
+ size 8300
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_5999/f_var_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:600295970e26039d7e3db6ff2090c0d3ac2480ea25cbabef47e87969d99e5f22
3
+ size 15281
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_5999/prior_precision_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f48cb09e594e968710304e5e0b4ddb1d5ac1bff4b876d32e6b68886f4d94b0f
3
+ size 1379
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_7999/f_mu_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c6b5ae4b07d2c86e7ecced4e97280ce2f596d562f7445ed9fd82210dc3d18ea
3
+ size 8300
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_7999/f_var_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b29d305bd92f091984e11cd25e0cb93242e68d918f68f36a9ac2a704572d95cf
3
+ size 15281
outputs_laplace/sst2/roberta-base_adapterstrain_val_0.0001_12345/step_7999/prior_precision_kron_all_homo_1000.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3daad801a0445717c8bcbc60c7377ad910fe65eea7694bf5cd53de5808d3e686
3
+ size 1379