ZZ99 commited on
Commit
5e7332d
1 Parent(s): a33bb7f

eval 0.7235

Browse files
README.md CHANGED
@@ -13,10 +13,10 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # test-mlm
15
 
16
- This model is a fine-tuned version of [ZZ99/deberta-v3-large-tapt](https://huggingface.co/ZZ99/deberta-v3-large-tapt) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.3692
19
- - Accuracy: 0.7180
20
 
21
  ## Model description
22
 
@@ -35,7 +35,7 @@ More information needed
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
- - learning_rate: 1e-05
39
  - train_batch_size: 4
40
  - eval_batch_size: 8
41
  - seed: 42
 
13
 
14
  # test-mlm
15
 
16
+ This model is a fine-tuned version of [/root/autodl-tmp/nbme/tmp/test-mlm/deberta-v3-large-tapt](https://huggingface.co//root/autodl-tmp/nbme/tmp/test-mlm/deberta-v3-large-tapt) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.3436
19
+ - Accuracy: 0.7235
20
 
21
  ## Model description
22
 
 
35
  ### Training hyperparameters
36
 
37
  The following hyperparameters were used during training:
38
+ - learning_rate: 5e-06
39
  - train_batch_size: 4
40
  - eval_batch_size: 8
41
  - seed: 42
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.7180157954865308,
4
- "eval_loss": 1.3691895008087158,
5
- "eval_runtime": 47.8255,
6
  "eval_samples": 794,
7
- "eval_samples_per_second": 16.602,
8
- "eval_steps_per_second": 2.091,
9
- "perplexity": 3.9321623891487576,
10
- "train_loss": 1.0548812925209943,
11
- "train_runtime": 7794.1827,
12
  "train_samples": 14828,
13
- "train_samples_per_second": 5.707,
14
- "train_steps_per_second": 1.427
15
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.7235127572561552,
4
+ "eval_loss": 1.3436471223831177,
5
+ "eval_runtime": 99.3125,
6
  "eval_samples": 794,
7
+ "eval_samples_per_second": 7.995,
8
+ "eval_steps_per_second": 1.007,
9
+ "perplexity": 3.8329974548301986,
10
+ "train_loss": 0.8344709603716365,
11
+ "train_runtime": 15660.631,
12
  "train_samples": 14828,
13
+ "train_samples_per_second": 2.84,
14
+ "train_steps_per_second": 0.71
15
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "ZZ99/deberta-v3-large-tapt",
3
  "architectures": [
4
  "NewDebertaV2ForMaskedLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "/root/autodl-tmp/nbme/tmp/test-mlm/deberta-v3-large-tapt",
3
  "architectures": [
4
  "NewDebertaV2ForMaskedLM"
5
  ],
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.7180157954865308,
4
- "eval_loss": 1.3691895008087158,
5
- "eval_runtime": 47.8255,
6
  "eval_samples": 794,
7
- "eval_samples_per_second": 16.602,
8
- "eval_steps_per_second": 2.091,
9
- "perplexity": 3.9321623891487576
10
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.7235127572561552,
4
+ "eval_loss": 1.3436471223831177,
5
+ "eval_runtime": 99.3125,
6
  "eval_samples": 794,
7
+ "eval_samples_per_second": 7.995,
8
+ "eval_steps_per_second": 1.007,
9
+ "perplexity": 3.8329974548301986
10
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbe4e67605ac34718bafb7684d144ba7006baf1dea461c31f4854256d610e2b7
3
  size 1740500457
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3dc582456ca4b83a1680e6ab80cd6af31b5edbff53b9f27f2d1e4ec4be9977a
3
  size 1740500457
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": false, "vocab_type": "spm", "special_tokens_map_file": null, "name_or_path": "ZZ99/deberta-v3-large-tapt", "sp_model_kwargs": {}, "tokenizer_class": "DebertaV2Tokenizer"}
 
1
+ {"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": false, "vocab_type": "spm", "special_tokens_map_file": null, "name_or_path": "/root/autodl-tmp/nbme/tmp/test-mlm/deberta-v3-large-tapt", "sp_model_kwargs": {}, "tokenizer_class": "DebertaV2Tokenizer"}
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 1.0548812925209943,
4
- "train_runtime": 7794.1827,
5
  "train_samples": 14828,
6
- "train_samples_per_second": 5.707,
7
- "train_steps_per_second": 1.427
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.8344709603716365,
4
+ "train_runtime": 15660.631,
5
  "train_samples": 14828,
6
+ "train_samples_per_second": 2.84,
7
+ "train_steps_per_second": 0.71
8
  }
trainer_state.json CHANGED
@@ -9,78 +9,78 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.27,
12
- "learning_rate": 9.100800287743908e-06,
13
- "loss": 1.1859,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.54,
18
- "learning_rate": 8.201600575487818e-06,
19
- "loss": 1.095,
20
  "step": 2000
21
  },
22
  {
23
  "epoch": 0.81,
24
- "learning_rate": 7.3024008632317245e-06,
25
- "loss": 1.0536,
26
  "step": 3000
27
  },
28
  {
29
  "epoch": 1.08,
30
- "learning_rate": 6.403201150975632e-06,
31
- "loss": 1.0394,
32
  "step": 4000
33
  },
34
  {
35
  "epoch": 1.35,
36
- "learning_rate": 5.50400143871954e-06,
37
- "loss": 1.0087,
38
  "step": 5000
39
  },
40
  {
41
  "epoch": 1.62,
42
- "learning_rate": 4.604801726463448e-06,
43
- "loss": 1.0179,
44
  "step": 6000
45
  },
46
  {
47
  "epoch": 1.89,
48
- "learning_rate": 3.705602014207356e-06,
49
- "loss": 1.0191,
50
  "step": 7000
51
  },
52
  {
53
  "epoch": 2.16,
54
- "learning_rate": 2.806402301951264e-06,
55
- "loss": 1.0278,
56
  "step": 8000
57
  },
58
  {
59
  "epoch": 2.43,
60
- "learning_rate": 1.9072025896951715e-06,
61
- "loss": 1.0345,
62
  "step": 9000
63
  },
64
  {
65
  "epoch": 2.7,
66
- "learning_rate": 1.0080028774390793e-06,
67
- "loss": 1.0502,
68
  "step": 10000
69
  },
70
  {
71
  "epoch": 2.97,
72
- "learning_rate": 1.0880316518298715e-07,
73
- "loss": 1.0697,
74
  "step": 11000
75
  },
76
  {
77
  "epoch": 3.0,
78
  "step": 11121,
79
  "total_flos": 4.147380631930061e+16,
80
- "train_loss": 1.0548812925209943,
81
- "train_runtime": 7794.1827,
82
- "train_samples_per_second": 5.707,
83
- "train_steps_per_second": 1.427
84
  }
85
  ],
86
  "max_steps": 11121,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.27,
12
+ "learning_rate": 4.550400143871954e-06,
13
+ "loss": 0.8007,
14
  "step": 1000
15
  },
16
  {
17
  "epoch": 0.54,
18
+ "learning_rate": 4.100800287743909e-06,
19
+ "loss": 0.7477,
20
  "step": 2000
21
  },
22
  {
23
  "epoch": 0.81,
24
+ "learning_rate": 3.6512004316158623e-06,
25
+ "loss": 0.7351,
26
  "step": 3000
27
  },
28
  {
29
  "epoch": 1.08,
30
+ "learning_rate": 3.201600575487816e-06,
31
+ "loss": 0.746,
32
  "step": 4000
33
  },
34
  {
35
  "epoch": 1.35,
36
+ "learning_rate": 2.75200071935977e-06,
37
+ "loss": 0.7474,
38
  "step": 5000
39
  },
40
  {
41
  "epoch": 1.62,
42
+ "learning_rate": 2.302400863231724e-06,
43
+ "loss": 0.7849,
44
  "step": 6000
45
  },
46
  {
47
  "epoch": 1.89,
48
+ "learning_rate": 1.852801007103678e-06,
49
+ "loss": 0.8197,
50
  "step": 7000
51
  },
52
  {
53
  "epoch": 2.16,
54
+ "learning_rate": 1.403201150975632e-06,
55
+ "loss": 0.8639,
56
  "step": 8000
57
  },
58
  {
59
  "epoch": 2.43,
60
+ "learning_rate": 9.536012948475857e-07,
61
+ "loss": 0.91,
62
  "step": 9000
63
  },
64
  {
65
  "epoch": 2.7,
66
+ "learning_rate": 5.040014387195397e-07,
67
+ "loss": 0.9666,
68
  "step": 10000
69
  },
70
  {
71
  "epoch": 2.97,
72
+ "learning_rate": 5.4401582591493575e-08,
73
+ "loss": 1.0304,
74
  "step": 11000
75
  },
76
  {
77
  "epoch": 3.0,
78
  "step": 11121,
79
  "total_flos": 4.147380631930061e+16,
80
+ "train_loss": 0.8344709603716365,
81
+ "train_runtime": 15660.631,
82
+ "train_samples_per_second": 2.84,
83
+ "train_steps_per_second": 0.71
84
  }
85
  ],
86
  "max_steps": 11121,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cae7a945faaf8e89a4059074b4664e2594d2de7b82e3f3e0e2278326df623d1
3
  size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8532469d3355217ccf53348dfd3e151e9edea2eb783d867db01f826efa1689d0
3
  size 3119