ewof commited on May 23, 2023

Commit

06d0b5e

•

1 Parent(s): 357a26c

clean up

Browse files

Files changed (33) hide show

README.md +8 -3
checkpoint-1152/config.json +0 -26
checkpoint-1152/generation_config.json +0 -6
checkpoint-1152/optimizer.pt +0 -3
checkpoint-1152/pytorch_model.bin +0 -3
checkpoint-1152/rng_state_0.pth +0 -3
checkpoint-1152/rng_state_1.pth +0 -3
checkpoint-1152/rng_state_2.pth +0 -3
checkpoint-1152/rng_state_3.pth +0 -3
checkpoint-1152/scheduler.pt +0 -3
checkpoint-1152/special_tokens_map.json +0 -5
checkpoint-1152/tokenizer.json +0 -0
checkpoint-1152/tokenizer_config.json +0 -10
checkpoint-1152/trainer_state.json +0 -0
checkpoint-1152/training_args.bin +0 -3
checkpoint-1280/config.json +0 -26
checkpoint-1280/generation_config.json +0 -6
checkpoint-1280/optimizer.pt +0 -3
checkpoint-1280/pytorch_model.bin +0 -3
checkpoint-1280/rng_state_0.pth +0 -3
checkpoint-1280/rng_state_1.pth +0 -3
checkpoint-1280/rng_state_2.pth +0 -3
checkpoint-1280/rng_state_3.pth +0 -3
checkpoint-1280/scheduler.pt +0 -3
checkpoint-1280/special_tokens_map.json +0 -5
checkpoint-1280/tokenizer.json +0 -0
checkpoint-1280/tokenizer_config.json +0 -10
checkpoint-1280/trainer_state.json +0 -0
checkpoint-1280/training_args.bin +0 -3
generation_config.json +0 -6
model.safetensors +0 -3
trainer_state.json +0 -0
training_args.bin +0 -3

README.md CHANGED Viewed

@@ -1,15 +1,20 @@
 ---
 datasets:
-- ewof/koishi-instruct-metharme
 ---
-trained on commit 69562cc of the linked dataset (before FLAN CoT and LLaMini were added to the dataset)
 ## Base Model
 native fine tune of togethercomputer/RedPajama-INCITE-Base-3B-v1
 ## Prompting
 The current model version has been trained on prompts using three different roles, which are denoted by the following tokens: `<|system|>`, `<|user|>` and `<|model|>`.
 The `<|system|>` prompt can be used to inject out-of-channel information behind the scenes, while the `<|user|>` prompt should be used to indicate user input. The `<|model|>` token should then be used to indicate that the model should generate a response. These tokens can happen multiple times and be chained up to form a conversation history.
 ## Benchmarks
-![koishi_instruct_3b_benchmarks.png](https://s3.amazonaws.com/moonup/production/uploads/6458315aaa426bae5e066852/Mh1HDq4fIYMeY7F72uVHG.png)

 ---
 datasets:
+  - ewof/koishi-instruct-metharme
 ---
+trained on commit 33b25a5 of the linked dataset (lamini)
 ## Base Model
 native fine tune of togethercomputer/RedPajama-INCITE-Base-3B-v1
 ## Prompting
 The current model version has been trained on prompts using three different roles, which are denoted by the following tokens: `<|system|>`, `<|user|>` and `<|model|>`.
 The `<|system|>` prompt can be used to inject out-of-channel information behind the scenes, while the `<|user|>` prompt should be used to indicate user input. The `<|model|>` token should then be used to indicate that the model should generate a response. These tokens can happen multiple times and be chained up to form a conversation history.
 ## Benchmarks
+![koishi_instruct_3b_v2_benchmarks.png](https://media.discordapp.net/attachments/1108805965421228052/1110025834699427840/tDcBmTvnLLSTdOUEKyHZMfra.png)

checkpoint-1152/config.json DELETED Viewed

@@ -1,26 +0,0 @@
-{
-  "_name_or_path": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
-  "architectures": [
-    "GPTNeoXForCausalLM"
-  ],
-  "bos_token_id": 0,
-  "classifier_dropout": 0.1,
-  "eos_token_id": 0,
-  "hidden_act": "gelu",
-  "hidden_size": 2560,
-  "initializer_range": 0.02,
-  "intermediate_size": 10240,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 2048,
-  "model_type": "gpt_neox",
-  "num_attention_heads": 32,
-  "num_hidden_layers": 32,
-  "rotary_emb_base": 10000,
-  "rotary_pct": 1.0,
-  "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.29.2",
-  "use_cache": true,
-  "use_parallel_residual": false,
-  "vocab_size": 50432
-}

checkpoint-1152/generation_config.json DELETED Viewed

@@ -1,6 +0,0 @@
-{
-  "_from_model_config": true,
-  "bos_token_id": 0,
-  "eos_token_id": 0,
-  "transformers_version": "4.29.2"
-}

checkpoint-1152/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9b25b8f0a1201a309f55820005ee38578885318a62130ec1b97b989e7abdadea
-size 11103802425

checkpoint-1152/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3f475276b7157f00d019aaf75109e397bd986eb303968718156498b9a505ac9c
-size 5686115609

checkpoint-1152/rng_state_0.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4aa38064359163a436b682a76922629be0d47715cc93798f53c30584786df380
-size 17655

checkpoint-1152/rng_state_1.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:386882d88441fb270dac4c0a02c6b12d9125a96b7fa06d4456448d9d1b9da975
-size 17655

checkpoint-1152/rng_state_2.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3c2119f0f2530fd325c172a94b131c42f068b4b61340e0b39e5188ebdebae961
-size 17655

checkpoint-1152/rng_state_3.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b265d61c7eae8db516afc3f2f413284853452f9b41a53ad49381bf25c87ed7a9
-size 17655

checkpoint-1152/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4e37413fae3fcd5fe95558c91c09a103a88f23895d3432b1ae34f6a82c6f7cbb
-size 627

checkpoint-1152/special_tokens_map.json DELETED Viewed

@@ -1,5 +0,0 @@
-{
-  "bos_token": "<|endoftext|>",
-  "eos_token": "<|endoftext|>",
-  "unk_token": "<|endoftext|>"
-}

checkpoint-1152/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1152/tokenizer_config.json DELETED Viewed

@@ -1,10 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": "<|endoftext|>",
-  "clean_up_tokenization_spaces": true,
-  "eos_token": "<|endoftext|>",
-  "model_max_length": 2048,
-  "padding_side": "right",
-  "tokenizer_class": "GPTNeoXTokenizer",
-  "unk_token": "<|endoftext|>"
-}

checkpoint-1152/trainer_state.json DELETED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1152/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8be3425ea525f1956ea7f5605ed90c71e428f1252e73ed5c2bc17a50d1e3c2e7
-size 3963

checkpoint-1280/config.json DELETED Viewed

@@ -1,26 +0,0 @@
-{
-  "_name_or_path": "togethercomputer/RedPajama-INCITE-Base-3B-v1",
-  "architectures": [
-    "GPTNeoXForCausalLM"
-  ],
-  "bos_token_id": 0,
-  "classifier_dropout": 0.1,
-  "eos_token_id": 0,
-  "hidden_act": "gelu",
-  "hidden_size": 2560,
-  "initializer_range": 0.02,
-  "intermediate_size": 10240,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 2048,
-  "model_type": "gpt_neox",
-  "num_attention_heads": 32,
-  "num_hidden_layers": 32,
-  "rotary_emb_base": 10000,
-  "rotary_pct": 1.0,
-  "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.29.2",
-  "use_cache": true,
-  "use_parallel_residual": false,
-  "vocab_size": 50432
-}

checkpoint-1280/generation_config.json DELETED Viewed

@@ -1,6 +0,0 @@
-{
-  "_from_model_config": true,
-  "bos_token_id": 0,
-  "eos_token_id": 0,
-  "transformers_version": "4.29.2"
-}

checkpoint-1280/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:fc188693373b13578ce9d267ef6764c0c25828da06e2718bc8232f1cb2b3258c
-size 11103802425

checkpoint-1280/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c574ec6e80f7fa816231824d9516314a3e767ff2c16553b90f63c05b4525ff1f
-size 5686115609

checkpoint-1280/rng_state_0.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:0224d29557651897de6e344d78bbb9b785843ad78a49f005c99eacfe7e07dea7
-size 17655

checkpoint-1280/rng_state_1.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:15ed5ccba70f86cbd59f7bfb2752c652c43123154fc2f2e2615a588c8a797fce
-size 17655

checkpoint-1280/rng_state_2.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4e25f55f214706ee17b931bb7b10e6773ce251919d45f1744d962e3fc19fc7e8
-size 17655

checkpoint-1280/rng_state_3.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:7f73f25b15884c097e03156b078a6a0d9264fdc7ac3e6463c53858c1241a5820
-size 17655

checkpoint-1280/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:edc59fa361c757fcde2a46aea7281b6c186e025947718063c5049653c82086f7
-size 627

checkpoint-1280/special_tokens_map.json DELETED Viewed

@@ -1,5 +0,0 @@
-{
-  "bos_token": "<|endoftext|>",
-  "eos_token": "<|endoftext|>",
-  "unk_token": "<|endoftext|>"
-}

checkpoint-1280/tokenizer.json DELETED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1280/tokenizer_config.json DELETED Viewed

@@ -1,10 +0,0 @@
-{
-  "add_prefix_space": false,
-  "bos_token": "<|endoftext|>",
-  "clean_up_tokenization_spaces": true,
-  "eos_token": "<|endoftext|>",
-  "model_max_length": 2048,
-  "padding_side": "right",
-  "tokenizer_class": "GPTNeoXTokenizer",
-  "unk_token": "<|endoftext|>"
-}

checkpoint-1280/trainer_state.json DELETED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1280/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8be3425ea525f1956ea7f5605ed90c71e428f1252e73ed5c2bc17a50d1e3c2e7
-size 3963

generation_config.json DELETED Viewed

@@ -1,6 +0,0 @@
-{
-  "_from_model_config": true,
-  "bos_token_id": 0,
-  "eos_token_id": 0,
-  "transformers_version": "4.29.2"
-}

model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9501d11a8002a39d54b448386e3a64cdae1290a309e2f5b4c7bb0b12fde1a6e1
-size 5686008384

trainer_state.json DELETED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8be3425ea525f1956ea7f5605ed90c71e428f1252e73ed5c2bc17a50d1e3c2e7
-size 3963