nadahlberg
commited on
Commit
•
73de8fd
1
Parent(s):
84cae2e
Model save
Browse files- README.md +36 -19
- config.json +1 -1
- eval_results.json +6 -6
- logs/events.out.tfevents.1721409006.nathan.71286.4 +3 -0
- logs/events.out.tfevents.1721410911.nathan.71286.5 +3 -0
- model.safetensors +1 -1
- run_config.json +8 -8
- tokenizer.json +4 -4
- tokenizer_config.json +0 -4
- training_args.bin +2 -2
README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
---
|
|
|
2 |
base_model: docketanalyzer/docket-lm-xs
|
3 |
tags:
|
4 |
- generated_from_trainer
|
@@ -16,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
|
|
16 |
|
17 |
This model is a fine-tuned version of [docketanalyzer/docket-lm-xs](https://huggingface.co/docketanalyzer/docket-lm-xs) on the None dataset.
|
18 |
It achieves the following results on the evaluation set:
|
19 |
-
- Loss: 0.
|
20 |
-
- F1: 0.
|
21 |
|
22 |
## Model description
|
23 |
|
@@ -37,32 +38,48 @@ More information needed
|
|
37 |
|
38 |
The following hyperparameters were used during training:
|
39 |
- learning_rate: 5e-05
|
40 |
-
- train_batch_size:
|
41 |
- eval_batch_size: 16
|
42 |
- seed: 42
|
|
|
|
|
43 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
44 |
- lr_scheduler_type: linear
|
45 |
-
-
|
46 |
-
- num_epochs:
|
47 |
|
48 |
### Training results
|
49 |
|
50 |
-
| Training Loss | Epoch
|
51 |
-
|
52 |
-
| 0.
|
53 |
-
| 0.
|
54 |
-
| 0.
|
55 |
-
| 0.
|
56 |
-
| 0.
|
57 |
-
| 0.
|
58 |
-
| 0.
|
59 |
-
| 0.
|
60 |
-
| 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
|
63 |
### Framework versions
|
64 |
|
65 |
-
- Transformers 4.
|
66 |
-
- Pytorch 2.
|
67 |
- Datasets 2.14.4
|
68 |
-
- Tokenizers 0.
|
|
|
1 |
---
|
2 |
+
license: apache-2.0
|
3 |
base_model: docketanalyzer/docket-lm-xs
|
4 |
tags:
|
5 |
- generated_from_trainer
|
|
|
17 |
|
18 |
This model is a fine-tuned version of [docketanalyzer/docket-lm-xs](https://huggingface.co/docketanalyzer/docket-lm-xs) on the None dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 0.0230
|
21 |
+
- F1: 0.9915
|
22 |
|
23 |
## Model description
|
24 |
|
|
|
38 |
|
39 |
The following hyperparameters were used during training:
|
40 |
- learning_rate: 5e-05
|
41 |
+
- train_batch_size: 8
|
42 |
- eval_batch_size: 16
|
43 |
- seed: 42
|
44 |
+
- gradient_accumulation_steps: 2
|
45 |
+
- total_train_batch_size: 16
|
46 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
47 |
- lr_scheduler_type: linear
|
48 |
+
- lr_scheduler_warmup_steps: 100
|
49 |
+
- num_epochs: 1
|
50 |
|
51 |
### Training results
|
52 |
|
53 |
+
| Training Loss | Epoch | Step | Validation Loss | F1 |
|
54 |
+
|:-------------:|:------:|:----:|:---------------:|:------:|
|
55 |
+
| 0.0112 | 0.0418 | 300 | 0.0576 | 0.9771 |
|
56 |
+
| 0.0551 | 0.0836 | 600 | 0.0362 | 0.9857 |
|
57 |
+
| 0.2331 | 0.1254 | 900 | 0.0354 | 0.9839 |
|
58 |
+
| 0.0009 | 0.1672 | 1200 | 0.0396 | 0.9868 |
|
59 |
+
| 0.005 | 0.2090 | 1500 | 0.0526 | 0.9867 |
|
60 |
+
| 0.0948 | 0.2508 | 1800 | 0.0434 | 0.9865 |
|
61 |
+
| 0.016 | 0.2926 | 2100 | 0.0297 | 0.9876 |
|
62 |
+
| 0.0047 | 0.3344 | 2400 | 0.0394 | 0.9882 |
|
63 |
+
| 0.0007 | 0.3763 | 2700 | 0.0422 | 0.9864 |
|
64 |
+
| 0.0037 | 0.4181 | 3000 | 0.0248 | 0.9910 |
|
65 |
+
| 0.002 | 0.4599 | 3300 | 0.0271 | 0.9909 |
|
66 |
+
| 0.0005 | 0.5017 | 3600 | 0.0283 | 0.9902 |
|
67 |
+
| 0.0155 | 0.5435 | 3900 | 0.0227 | 0.9910 |
|
68 |
+
| 0.0017 | 0.5853 | 4200 | 0.0290 | 0.9907 |
|
69 |
+
| 0.0002 | 0.6271 | 4500 | 0.0264 | 0.9899 |
|
70 |
+
| 0.0051 | 0.6689 | 4800 | 0.0294 | 0.9907 |
|
71 |
+
| 0.0152 | 0.7107 | 5100 | 0.0253 | 0.9903 |
|
72 |
+
| 0.0096 | 0.7525 | 5400 | 0.0232 | 0.9909 |
|
73 |
+
| 0.1812 | 0.7943 | 5700 | 0.0295 | 0.9915 |
|
74 |
+
| 0.0007 | 0.8361 | 6000 | 0.0235 | 0.9912 |
|
75 |
+
| 0.0081 | 0.8779 | 6300 | 0.0247 | 0.9910 |
|
76 |
+
| 0.0684 | 0.9197 | 6600 | 0.0236 | 0.9905 |
|
77 |
+
| 0.0003 | 0.9615 | 6900 | 0.0230 | 0.9914 |
|
78 |
|
79 |
|
80 |
### Framework versions
|
81 |
|
82 |
+
- Transformers 4.41.1
|
83 |
+
- Pytorch 2.3.0+cu121
|
84 |
- Datasets 2.14.4
|
85 |
+
- Tokenizers 0.19.1
|
config.json
CHANGED
@@ -37,7 +37,7 @@
|
|
37 |
"relative_attention": true,
|
38 |
"share_att_key": true,
|
39 |
"torch_dtype": "float32",
|
40 |
-
"transformers_version": "4.
|
41 |
"type_vocab_size": 0,
|
42 |
"vocab_size": 128100
|
43 |
}
|
|
|
37 |
"relative_attention": true,
|
38 |
"share_att_key": true,
|
39 |
"torch_dtype": "float32",
|
40 |
+
"transformers_version": "4.41.1",
|
41 |
"type_vocab_size": 0,
|
42 |
"vocab_size": 128100
|
43 |
}
|
eval_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"eval_loss": 0.
|
3 |
-
"eval_f1": 0.
|
4 |
-
"eval_runtime":
|
5 |
-
"eval_samples_per_second":
|
6 |
-
"eval_steps_per_second":
|
7 |
-
"epoch":
|
8 |
}
|
|
|
1 |
{
|
2 |
+
"eval_loss": 0.02304094284772873,
|
3 |
+
"eval_f1": 0.9915240152900117,
|
4 |
+
"eval_runtime": 20.4919,
|
5 |
+
"eval_samples_per_second": 294.848,
|
6 |
+
"eval_steps_per_second": 18.446,
|
7 |
+
"epoch": 1.0
|
8 |
}
|
logs/events.out.tfevents.1721409006.nathan.71286.4
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18367122e39cb518947b6721477cabc8f895a2d188ff855c19f54389723e2a6a
|
3 |
+
size 769622
|
logs/events.out.tfevents.1721410911.nathan.71286.5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b4a89b1f0c4af0841718abb2ef789cd0207203d04ab0115e27d3cef3154b422
|
3 |
+
size 405
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 283347432
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ac8f7621a8705102a81330b9a78f1b7b5db3a62768d9bf025d129ecc4655242
|
3 |
size 283347432
|
run_config.json
CHANGED
@@ -4,18 +4,18 @@
|
|
4 |
"max_length": 256
|
5 |
},
|
6 |
"training_args": {
|
7 |
-
"num_train_epochs":
|
8 |
-
"per_device_train_batch_size":
|
9 |
"per_device_eval_batch_size": 16,
|
10 |
-
"gradient_accumulation_steps":
|
11 |
"learning_rate": 5e-05,
|
12 |
"weight_decay": 0.1,
|
13 |
-
"
|
14 |
"evaluation_strategy": "steps",
|
15 |
-
"eval_steps":
|
16 |
-
"save_steps":
|
17 |
-
"save_total_limit":
|
18 |
},
|
19 |
-
"run_name": "
|
20 |
"run_type": "ClassificationRoutine"
|
21 |
}
|
|
|
4 |
"max_length": 256
|
5 |
},
|
6 |
"training_args": {
|
7 |
+
"num_train_epochs": 1,
|
8 |
+
"per_device_train_batch_size": 8,
|
9 |
"per_device_eval_batch_size": 16,
|
10 |
+
"gradient_accumulation_steps": 2,
|
11 |
"learning_rate": 5e-05,
|
12 |
"weight_decay": 0.1,
|
13 |
+
"warmup_steps": 100,
|
14 |
"evaluation_strategy": "steps",
|
15 |
+
"eval_steps": 300,
|
16 |
+
"save_steps": 300,
|
17 |
+
"save_total_limit": 2
|
18 |
},
|
19 |
+
"run_name": "initial_model",
|
20 |
"run_type": "ClassificationRoutine"
|
21 |
}
|
tokenizer.json
CHANGED
@@ -90,8 +90,8 @@
|
|
90 |
{
|
91 |
"type": "Metaspace",
|
92 |
"replacement": "▁",
|
93 |
-
"
|
94 |
-
"
|
95 |
}
|
96 |
]
|
97 |
},
|
@@ -173,8 +173,8 @@
|
|
173 |
"decoder": {
|
174 |
"type": "Metaspace",
|
175 |
"replacement": "▁",
|
176 |
-
"
|
177 |
-
"
|
178 |
},
|
179 |
"model": {
|
180 |
"type": "Unigram",
|
|
|
90 |
{
|
91 |
"type": "Metaspace",
|
92 |
"replacement": "▁",
|
93 |
+
"prepend_scheme": "always",
|
94 |
+
"split": true
|
95 |
}
|
96 |
]
|
97 |
},
|
|
|
173 |
"decoder": {
|
174 |
"type": "Metaspace",
|
175 |
"replacement": "▁",
|
176 |
+
"prepend_scheme": "always",
|
177 |
+
"split": true
|
178 |
},
|
179 |
"model": {
|
180 |
"type": "Unigram",
|
tokenizer_config.json
CHANGED
@@ -47,16 +47,12 @@
|
|
47 |
"do_lower_case": false,
|
48 |
"eos_token": "[SEP]",
|
49 |
"mask_token": "[MASK]",
|
50 |
-
"max_length": 256,
|
51 |
"model_max_length": 1000000000000000019884624838656,
|
52 |
"pad_token": "[PAD]",
|
53 |
"sep_token": "[SEP]",
|
54 |
"sp_model_kwargs": {},
|
55 |
"split_by_punct": false,
|
56 |
-
"stride": 0,
|
57 |
"tokenizer_class": "DebertaV2Tokenizer",
|
58 |
-
"truncation_side": "right",
|
59 |
-
"truncation_strategy": "longest_first",
|
60 |
"unk_token": "[UNK]",
|
61 |
"vocab_type": "spm"
|
62 |
}
|
|
|
47 |
"do_lower_case": false,
|
48 |
"eos_token": "[SEP]",
|
49 |
"mask_token": "[MASK]",
|
|
|
50 |
"model_max_length": 1000000000000000019884624838656,
|
51 |
"pad_token": "[PAD]",
|
52 |
"sep_token": "[SEP]",
|
53 |
"sp_model_kwargs": {},
|
54 |
"split_by_punct": false,
|
|
|
55 |
"tokenizer_class": "DebertaV2Tokenizer",
|
|
|
|
|
56 |
"unk_token": "[UNK]",
|
57 |
"vocab_type": "spm"
|
58 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3656fcbc1832dd9c0867ba00d4106f7f2d6ac5ab8dfe75730fee3152c51822ce
|
3 |
+
size 5112
|