nadahlberg commited on
Commit
73de8fd
1 Parent(s): 84cae2e

Model save

Browse files
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  base_model: docketanalyzer/docket-lm-xs
3
  tags:
4
  - generated_from_trainer
@@ -16,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [docketanalyzer/docket-lm-xs](https://huggingface.co/docketanalyzer/docket-lm-xs) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.0212
20
- - F1: 0.9938
21
 
22
  ## Model description
23
 
@@ -37,32 +38,48 @@ More information needed
37
 
38
  The following hyperparameters were used during training:
39
  - learning_rate: 5e-05
40
- - train_batch_size: 16
41
  - eval_batch_size: 16
42
  - seed: 42
 
 
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
- - lr_scheduler_warmup_ratio: 0.02
46
- - num_epochs: 6
47
 
48
  ### Training results
49
 
50
- | Training Loss | Epoch | Step | Validation Loss | F1 |
51
- |:-------------:|:-----:|:----:|:---------------:|:------:|
52
- | 0.0301 | 0.61 | 60 | 0.0452 | 0.9847 |
53
- | 0.1998 | 1.22 | 120 | 0.0193 | 0.9969 |
54
- | 0.003 | 1.84 | 180 | 0.0187 | 0.9938 |
55
- | 0.0022 | 2.45 | 240 | 0.0159 | 0.9938 |
56
- | 0.1395 | 3.06 | 300 | 0.0303 | 0.9908 |
57
- | 0.0017 | 3.67 | 360 | 0.0253 | 0.9908 |
58
- | 0.0013 | 4.29 | 420 | 0.0249 | 0.9908 |
59
- | 0.001 | 4.9 | 480 | 0.0222 | 0.9908 |
60
- | 0.001 | 5.51 | 540 | 0.0211 | 0.9938 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
 
63
  ### Framework versions
64
 
65
- - Transformers 4.37.1
66
- - Pytorch 2.1.2+cu121
67
  - Datasets 2.14.4
68
- - Tokenizers 0.15.1
 
1
  ---
2
+ license: apache-2.0
3
  base_model: docketanalyzer/docket-lm-xs
4
  tags:
5
  - generated_from_trainer
 
17
 
18
  This model is a fine-tuned version of [docketanalyzer/docket-lm-xs](https://huggingface.co/docketanalyzer/docket-lm-xs) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.0230
21
+ - F1: 0.9915
22
 
23
  ## Model description
24
 
 
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 5e-05
41
+ - train_batch_size: 8
42
  - eval_batch_size: 16
43
  - seed: 42
44
+ - gradient_accumulation_steps: 2
45
+ - total_train_batch_size: 16
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
+ - lr_scheduler_warmup_steps: 100
49
+ - num_epochs: 1
50
 
51
  ### Training results
52
 
53
+ | Training Loss | Epoch | Step | Validation Loss | F1 |
54
+ |:-------------:|:------:|:----:|:---------------:|:------:|
55
+ | 0.0112 | 0.0418 | 300 | 0.0576 | 0.9771 |
56
+ | 0.0551 | 0.0836 | 600 | 0.0362 | 0.9857 |
57
+ | 0.2331 | 0.1254 | 900 | 0.0354 | 0.9839 |
58
+ | 0.0009 | 0.1672 | 1200 | 0.0396 | 0.9868 |
59
+ | 0.005 | 0.2090 | 1500 | 0.0526 | 0.9867 |
60
+ | 0.0948 | 0.2508 | 1800 | 0.0434 | 0.9865 |
61
+ | 0.016 | 0.2926 | 2100 | 0.0297 | 0.9876 |
62
+ | 0.0047 | 0.3344 | 2400 | 0.0394 | 0.9882 |
63
+ | 0.0007 | 0.3763 | 2700 | 0.0422 | 0.9864 |
64
+ | 0.0037 | 0.4181 | 3000 | 0.0248 | 0.9910 |
65
+ | 0.002 | 0.4599 | 3300 | 0.0271 | 0.9909 |
66
+ | 0.0005 | 0.5017 | 3600 | 0.0283 | 0.9902 |
67
+ | 0.0155 | 0.5435 | 3900 | 0.0227 | 0.9910 |
68
+ | 0.0017 | 0.5853 | 4200 | 0.0290 | 0.9907 |
69
+ | 0.0002 | 0.6271 | 4500 | 0.0264 | 0.9899 |
70
+ | 0.0051 | 0.6689 | 4800 | 0.0294 | 0.9907 |
71
+ | 0.0152 | 0.7107 | 5100 | 0.0253 | 0.9903 |
72
+ | 0.0096 | 0.7525 | 5400 | 0.0232 | 0.9909 |
73
+ | 0.1812 | 0.7943 | 5700 | 0.0295 | 0.9915 |
74
+ | 0.0007 | 0.8361 | 6000 | 0.0235 | 0.9912 |
75
+ | 0.0081 | 0.8779 | 6300 | 0.0247 | 0.9910 |
76
+ | 0.0684 | 0.9197 | 6600 | 0.0236 | 0.9905 |
77
+ | 0.0003 | 0.9615 | 6900 | 0.0230 | 0.9914 |
78
 
79
 
80
  ### Framework versions
81
 
82
+ - Transformers 4.41.1
83
+ - Pytorch 2.3.0+cu121
84
  - Datasets 2.14.4
85
+ - Tokenizers 0.19.1
config.json CHANGED
@@ -37,7 +37,7 @@
37
  "relative_attention": true,
38
  "share_att_key": true,
39
  "torch_dtype": "float32",
40
- "transformers_version": "4.37.1",
41
  "type_vocab_size": 0,
42
  "vocab_size": 128100
43
  }
 
37
  "relative_attention": true,
38
  "share_att_key": true,
39
  "torch_dtype": "float32",
40
+ "transformers_version": "4.41.1",
41
  "type_vocab_size": 0,
42
  "vocab_size": 128100
43
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "eval_loss": 0.021180542185902596,
3
- "eval_f1": 0.9938271604938271,
4
- "eval_runtime": 1.2119,
5
- "eval_samples_per_second": 320.974,
6
- "eval_steps_per_second": 20.628,
7
- "epoch": 6.0
8
  }
 
1
  {
2
+ "eval_loss": 0.02304094284772873,
3
+ "eval_f1": 0.9915240152900117,
4
+ "eval_runtime": 20.4919,
5
+ "eval_samples_per_second": 294.848,
6
+ "eval_steps_per_second": 18.446,
7
+ "epoch": 1.0
8
  }
logs/events.out.tfevents.1721409006.nathan.71286.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18367122e39cb518947b6721477cabc8f895a2d188ff855c19f54389723e2a6a
3
+ size 769622
logs/events.out.tfevents.1721410911.nathan.71286.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b4a89b1f0c4af0841718abb2ef789cd0207203d04ab0115e27d3cef3154b422
3
+ size 405
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c78d75a907a27d97bc87d6cbd50ed9d784bdb707cf07239fd9e81fc382a53ea3
3
  size 283347432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ac8f7621a8705102a81330b9a78f1b7b5db3a62768d9bf025d129ecc4655242
3
  size 283347432
run_config.json CHANGED
@@ -4,18 +4,18 @@
4
  "max_length": 256
5
  },
6
  "training_args": {
7
- "num_train_epochs": 6,
8
- "per_device_train_batch_size": 16,
9
  "per_device_eval_batch_size": 16,
10
- "gradient_accumulation_steps": 1,
11
  "learning_rate": 5e-05,
12
  "weight_decay": 0.1,
13
- "warmup_ratio": 0.02,
14
  "evaluation_strategy": "steps",
15
- "eval_steps": 60,
16
- "save_steps": 60,
17
- "save_total_limit": 1
18
  },
19
- "run_name": "complaint",
20
  "run_type": "ClassificationRoutine"
21
  }
 
4
  "max_length": 256
5
  },
6
  "training_args": {
7
+ "num_train_epochs": 1,
8
+ "per_device_train_batch_size": 8,
9
  "per_device_eval_batch_size": 16,
10
+ "gradient_accumulation_steps": 2,
11
  "learning_rate": 5e-05,
12
  "weight_decay": 0.1,
13
+ "warmup_steps": 100,
14
  "evaluation_strategy": "steps",
15
+ "eval_steps": 300,
16
+ "save_steps": 300,
17
+ "save_total_limit": 2
18
  },
19
+ "run_name": "initial_model",
20
  "run_type": "ClassificationRoutine"
21
  }
tokenizer.json CHANGED
@@ -90,8 +90,8 @@
90
  {
91
  "type": "Metaspace",
92
  "replacement": "▁",
93
- "add_prefix_space": true,
94
- "prepend_scheme": "always"
95
  }
96
  ]
97
  },
@@ -173,8 +173,8 @@
173
  "decoder": {
174
  "type": "Metaspace",
175
  "replacement": "▁",
176
- "add_prefix_space": true,
177
- "prepend_scheme": "always"
178
  },
179
  "model": {
180
  "type": "Unigram",
 
90
  {
91
  "type": "Metaspace",
92
  "replacement": "▁",
93
+ "prepend_scheme": "always",
94
+ "split": true
95
  }
96
  ]
97
  },
 
173
  "decoder": {
174
  "type": "Metaspace",
175
  "replacement": "▁",
176
+ "prepend_scheme": "always",
177
+ "split": true
178
  },
179
  "model": {
180
  "type": "Unigram",
tokenizer_config.json CHANGED
@@ -47,16 +47,12 @@
47
  "do_lower_case": false,
48
  "eos_token": "[SEP]",
49
  "mask_token": "[MASK]",
50
- "max_length": 256,
51
  "model_max_length": 1000000000000000019884624838656,
52
  "pad_token": "[PAD]",
53
  "sep_token": "[SEP]",
54
  "sp_model_kwargs": {},
55
  "split_by_punct": false,
56
- "stride": 0,
57
  "tokenizer_class": "DebertaV2Tokenizer",
58
- "truncation_side": "right",
59
- "truncation_strategy": "longest_first",
60
  "unk_token": "[UNK]",
61
  "vocab_type": "spm"
62
  }
 
47
  "do_lower_case": false,
48
  "eos_token": "[SEP]",
49
  "mask_token": "[MASK]",
 
50
  "model_max_length": 1000000000000000019884624838656,
51
  "pad_token": "[PAD]",
52
  "sep_token": "[SEP]",
53
  "sp_model_kwargs": {},
54
  "split_by_punct": false,
 
55
  "tokenizer_class": "DebertaV2Tokenizer",
 
 
56
  "unk_token": "[UNK]",
57
  "vocab_type": "spm"
58
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:257fe51d7cf53fa40026935e0b9ddc3ee3878866616085e360f7c39fa63f0239
3
- size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3656fcbc1832dd9c0867ba00d4106f7f2d6ac5ab8dfe75730fee3152c51822ce
3
+ size 5112