ewof commited on
Commit
06d0b5e
1 Parent(s): 357a26c
README.md CHANGED
@@ -1,15 +1,20 @@
1
  ---
2
  datasets:
3
- - ewof/koishi-instruct-metharme
4
  ---
5
- trained on commit 69562cc of the linked dataset (before FLAN CoT and LLaMini were added to the dataset)
 
 
6
  ## Base Model
 
7
  native fine tune of togethercomputer/RedPajama-INCITE-Base-3B-v1
8
 
9
  ## Prompting
 
10
  The current model version has been trained on prompts using three different roles, which are denoted by the following tokens: `<|system|>`, `<|user|>` and `<|model|>`.
11
 
12
  The `<|system|>` prompt can be used to inject out-of-channel information behind the scenes, while the `<|user|>` prompt should be used to indicate user input. The `<|model|>` token should then be used to indicate that the model should generate a response. These tokens can happen multiple times and be chained up to form a conversation history.
13
 
14
  ## Benchmarks
15
- ![koishi_instruct_3b_benchmarks.png](https://s3.amazonaws.com/moonup/production/uploads/6458315aaa426bae5e066852/Mh1HDq4fIYMeY7F72uVHG.png)
 
 
1
  ---
2
  datasets:
3
+ - ewof/koishi-instruct-metharme
4
  ---
5
+
6
+ trained on commit 33b25a5 of the linked dataset (lamini)
7
+
8
  ## Base Model
9
+
10
  native fine tune of togethercomputer/RedPajama-INCITE-Base-3B-v1
11
 
12
  ## Prompting
13
+
14
  The current model version has been trained on prompts using three different roles, which are denoted by the following tokens: `<|system|>`, `<|user|>` and `<|model|>`.
15
 
16
  The `<|system|>` prompt can be used to inject out-of-channel information behind the scenes, while the `<|user|>` prompt should be used to indicate user input. The `<|model|>` token should then be used to indicate that the model should generate a response. These tokens can happen multiple times and be chained up to form a conversation history.
17
 
18
  ## Benchmarks
19
+
20
+ ![koishi_instruct_3b_v2_benchmarks.png](https://media.discordapp.net/attachments/1108805965421228052/1110025834699427840/tDcBmTvnLLSTdOUEKyHZMfra.png)
checkpoint-1152/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "_name_or_path": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
3
- "architectures": [
4
- "GPTNeoXForCausalLM"
5
- ],
6
- "bos_token_id": 0,
7
- "classifier_dropout": 0.1,
8
- "eos_token_id": 0,
9
- "hidden_act": "gelu",
10
- "hidden_size": 2560,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 10240,
13
- "layer_norm_eps": 1e-05,
14
- "max_position_embeddings": 2048,
15
- "model_type": "gpt_neox",
16
- "num_attention_heads": 32,
17
- "num_hidden_layers": 32,
18
- "rotary_emb_base": 10000,
19
- "rotary_pct": 1.0,
20
- "tie_word_embeddings": false,
21
- "torch_dtype": "bfloat16",
22
- "transformers_version": "4.29.2",
23
- "use_cache": true,
24
- "use_parallel_residual": false,
25
- "vocab_size": 50432
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1152/generation_config.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "eos_token_id": 0,
5
- "transformers_version": "4.29.2"
6
- }
 
 
 
 
 
 
 
checkpoint-1152/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b25b8f0a1201a309f55820005ee38578885318a62130ec1b97b989e7abdadea
3
- size 11103802425
 
 
 
 
checkpoint-1152/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f475276b7157f00d019aaf75109e397bd986eb303968718156498b9a505ac9c
3
- size 5686115609
 
 
 
 
checkpoint-1152/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4aa38064359163a436b682a76922629be0d47715cc93798f53c30584786df380
3
- size 17655
 
 
 
 
checkpoint-1152/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:386882d88441fb270dac4c0a02c6b12d9125a96b7fa06d4456448d9d1b9da975
3
- size 17655
 
 
 
 
checkpoint-1152/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c2119f0f2530fd325c172a94b131c42f068b4b61340e0b39e5188ebdebae961
3
- size 17655
 
 
 
 
checkpoint-1152/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b265d61c7eae8db516afc3f2f413284853452f9b41a53ad49381bf25c87ed7a9
3
- size 17655
 
 
 
 
checkpoint-1152/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e37413fae3fcd5fe95558c91c09a103a88f23895d3432b1ae34f6a82c6f7cbb
3
- size 627
 
 
 
 
checkpoint-1152/special_tokens_map.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "unk_token": "<|endoftext|>"
5
- }
 
 
 
 
 
 
checkpoint-1152/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-1152/tokenizer_config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "bos_token": "<|endoftext|>",
4
- "clean_up_tokenization_spaces": true,
5
- "eos_token": "<|endoftext|>",
6
- "model_max_length": 2048,
7
- "padding_side": "right",
8
- "tokenizer_class": "GPTNeoXTokenizer",
9
- "unk_token": "<|endoftext|>"
10
- }
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1152/trainer_state.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-1152/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8be3425ea525f1956ea7f5605ed90c71e428f1252e73ed5c2bc17a50d1e3c2e7
3
- size 3963
 
 
 
 
checkpoint-1280/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "_name_or_path": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
3
- "architectures": [
4
- "GPTNeoXForCausalLM"
5
- ],
6
- "bos_token_id": 0,
7
- "classifier_dropout": 0.1,
8
- "eos_token_id": 0,
9
- "hidden_act": "gelu",
10
- "hidden_size": 2560,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 10240,
13
- "layer_norm_eps": 1e-05,
14
- "max_position_embeddings": 2048,
15
- "model_type": "gpt_neox",
16
- "num_attention_heads": 32,
17
- "num_hidden_layers": 32,
18
- "rotary_emb_base": 10000,
19
- "rotary_pct": 1.0,
20
- "tie_word_embeddings": false,
21
- "torch_dtype": "bfloat16",
22
- "transformers_version": "4.29.2",
23
- "use_cache": true,
24
- "use_parallel_residual": false,
25
- "vocab_size": 50432
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1280/generation_config.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "eos_token_id": 0,
5
- "transformers_version": "4.29.2"
6
- }
 
 
 
 
 
 
 
checkpoint-1280/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc188693373b13578ce9d267ef6764c0c25828da06e2718bc8232f1cb2b3258c
3
- size 11103802425
 
 
 
 
checkpoint-1280/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c574ec6e80f7fa816231824d9516314a3e767ff2c16553b90f63c05b4525ff1f
3
- size 5686115609
 
 
 
 
checkpoint-1280/rng_state_0.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0224d29557651897de6e344d78bbb9b785843ad78a49f005c99eacfe7e07dea7
3
- size 17655
 
 
 
 
checkpoint-1280/rng_state_1.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:15ed5ccba70f86cbd59f7bfb2752c652c43123154fc2f2e2615a588c8a797fce
3
- size 17655
 
 
 
 
checkpoint-1280/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e25f55f214706ee17b931bb7b10e6773ce251919d45f1744d962e3fc19fc7e8
3
- size 17655
 
 
 
 
checkpoint-1280/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f73f25b15884c097e03156b078a6a0d9264fdc7ac3e6463c53858c1241a5820
3
- size 17655
 
 
 
 
checkpoint-1280/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:edc59fa361c757fcde2a46aea7281b6c186e025947718063c5049653c82086f7
3
- size 627
 
 
 
 
checkpoint-1280/special_tokens_map.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "bos_token": "<|endoftext|>",
3
- "eos_token": "<|endoftext|>",
4
- "unk_token": "<|endoftext|>"
5
- }
 
 
 
 
 
 
checkpoint-1280/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-1280/tokenizer_config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "bos_token": "<|endoftext|>",
4
- "clean_up_tokenization_spaces": true,
5
- "eos_token": "<|endoftext|>",
6
- "model_max_length": 2048,
7
- "padding_side": "right",
8
- "tokenizer_class": "GPTNeoXTokenizer",
9
- "unk_token": "<|endoftext|>"
10
- }
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1280/trainer_state.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-1280/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8be3425ea525f1956ea7f5605ed90c71e428f1252e73ed5c2bc17a50d1e3c2e7
3
- size 3963
 
 
 
 
generation_config.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "eos_token_id": 0,
5
- "transformers_version": "4.29.2"
6
- }
 
 
 
 
 
 
 
model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9501d11a8002a39d54b448386e3a64cdae1290a309e2f5b4c7bb0b12fde1a6e1
3
- size 5686008384
 
 
 
 
trainer_state.json DELETED
The diff for this file is too large to render. See raw diff
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8be3425ea525f1956ea7f5605ed90c71e428f1252e73ed5c2bc17a50d1e3c2e7
3
- size 3963