diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8de4eaf4fb84698ba6cee6e468e1658274da6a2 --- /dev/null +++ b/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "alpindale/WizardLM-2-8x22B", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 6144, + "initializer_range": 0.02, + "intermediate_size": 16384, + "max_position_embeddings": 65536, + "model_type": "mixtral", + "num_attention_heads": 48, + "num_experts_per_tok": 2, + "num_hidden_layers": 56, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.45.0.dev0", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5d0dd04cdb8df77be89256e848431284ed853f2a --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.45.0.dev0" +} diff --git a/model-00001-of-00059.safetensors b/model-00001-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4b1e55b16b54451f62033febc81e9e6107c462b --- /dev/null +++ b/model-00001-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d7d3d5d04aa64ba71d25d250f327c727c74c41a2bfe2059473d5da08ae0562f +size 4998663696 diff --git a/model-00002-of-00059.safetensors b/model-00002-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d410db7242f88d81a7d1b2f1a175d1546aebca5 --- /dev/null +++ b/model-00002-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5f291ecb2ca6bda09f238b552d94df1de8bde2573873fa34ed4b6db825044a7 +size 4806799120 diff --git a/model-00003-of-00059.safetensors b/model-00003-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f66c9b2d86b61f5b09ff13b8eaa4715161ed7fe --- /dev/null +++ b/model-00003-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bad12c7affc5bc8fa540e18f1e215473f7bc178fd037bbabc87fd0a1e5cb2eb +size 4806799120 diff --git a/model-00004-of-00059.safetensors b/model-00004-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6bbb1321a10897e0dbc56c375e5860981ac4afc --- /dev/null +++ b/model-00004-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:620fe0f510fa465bc06946c275673640e341ce7ea7912ecaffabd7afed88e544 +size 4806799120 diff --git a/model-00005-of-00059.safetensors b/model-00005-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10c392ee7de4cf9f0ffa727dcb1a997066c57d5d --- /dev/null +++ b/model-00005-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d552b8f44a2d0fc8956e60a2a33cf0679954158f9546b6c14425377d09eb8e +size 4806799120 diff --git a/model-00006-of-00059.safetensors b/model-00006-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b846e78959b9c51926efa68a59dc171590b72d59 --- /dev/null +++ b/model-00006-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398e1bfda69967ada4d27ff86001a05817fd9d48fc4d97b872e37bcad099e608 +size 4806799120 diff --git a/model-00007-of-00059.safetensors b/model-00007-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a762117dd3cf0ad2091c83ab72c2640a8dacb3c --- /dev/null +++ b/model-00007-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67f65f83fd01d575921e79232d0cd7c43130339b0f057a34894b2e2f2ea229d4 +size 4806799120 diff --git a/model-00008-of-00059.safetensors b/model-00008-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9547d053d7b865e2c647120c53d1db632ef22b43 --- /dev/null +++ b/model-00008-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7eef0aced50f483e3977002608dbe69742e50083a2223ef02a893efef2e1fc1 +size 4806799120 diff --git a/model-00009-of-00059.safetensors b/model-00009-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9293d0aa6a6216cc987f9926046156a62d28623 --- /dev/null +++ b/model-00009-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab2455033a4a602265b276eb3d201ab446eab1f21aafa45971c84357ab858aaf +size 4806799120 diff --git a/model-00010-of-00059.safetensors b/model-00010-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b87887bcb8526d11921aa6fe3cd36ba18b07b6da --- /dev/null +++ b/model-00010-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e6a1ae7c53dd1ab4785435403c78f5c195619dbeae62dd59a9a3b8256d49db4 +size 4806799120 diff --git a/model-00011-of-00059.safetensors b/model-00011-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d705d0796178b46492f26d1cb4e68875c053976d --- /dev/null +++ b/model-00011-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da7b3b0f4ff5c5b240d412424b9df5a4bd12dfb77847b84987d51204e682a407 +size 4806799136 diff --git a/model-00012-of-00059.safetensors b/model-00012-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..982a1eb62241bf20b4ea358a06f132c914a4c820 --- /dev/null +++ b/model-00012-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893b06d532e9c451e08bafa8e9f54776a0d5f939806108468e459ea9f6999552 +size 4806799152 diff --git a/model-00013-of-00059.safetensors b/model-00013-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a747738e2b29830e65ae8035876b90df8c952b05 --- /dev/null +++ b/model-00013-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a59f9785587ced21ef498f73d20b26334b24b1701978fdb616c8293e122b4ca +size 4806799152 diff --git a/model-00014-of-00059.safetensors b/model-00014-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e250b01edaaacb1751ea10ef41475a1410d4a51d --- /dev/null +++ b/model-00014-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1cb9d52f4f4ab5f3740f7a82acc7c82b5ed5b757f9cad1abb0ad4350cf9f41e +size 4806799152 diff --git a/model-00015-of-00059.safetensors b/model-00015-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48257cfec2056a93be7082e0d4e6e58b0919711f --- /dev/null +++ b/model-00015-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efbbf3c0039a1ebf7e26aab96e776c540e886533ba746568eab15f96ce44ee38 +size 4806799152 diff --git a/model-00016-of-00059.safetensors b/model-00016-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..404714ebc39a66f1ff204da8a844d42852c922a5 --- /dev/null +++ b/model-00016-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9819d0aa6cdfabad7b86c42ff9777a674f969019f4fe2aaa6a9e98e2929c1637 +size 4806799152 diff --git a/model-00017-of-00059.safetensors b/model-00017-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..012dbd70a026ca7003b24d03868212fb151274fd --- /dev/null +++ b/model-00017-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67fdf0216749a459671dbdfefa6c8beee468acd78ada1e952d563aeacc8fec8b +size 4806799152 diff --git a/model-00018-of-00059.safetensors b/model-00018-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffaf78ce9c1466b64758dad29d4ae1a3d9d5ba98 --- /dev/null +++ b/model-00018-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:935812c05f6aa6e36c015b7ed675d72e81bc2d4f95a09e94bca7c5593b1b7955 +size 4806799152 diff --git a/model-00019-of-00059.safetensors b/model-00019-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec983f085b7fcc3d7a4415b3d3f7e5e139a8e778 --- /dev/null +++ b/model-00019-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a31cd3be7d421893dd586fd8220a7be95af0d58a9d14d1f395f40531eff0a10 +size 4806799152 diff --git a/model-00020-of-00059.safetensors b/model-00020-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80dd0c9e0b0b0fd3552823cf318d9226cf5a14e5 --- /dev/null +++ b/model-00020-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55e0d6f28661de4ad8ba8e6e0bf3f4bbe9565ffbff703af265b66c04b0baf69c +size 4806799152 diff --git a/model-00021-of-00059.safetensors b/model-00021-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ddbe15feceeac19fe4a8ea91be7475e5384cd0d9 --- /dev/null +++ b/model-00021-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd1595aac673b69ee361fe700b999896862f792d5630659649a572e1bb43711 +size 4806799152 diff --git a/model-00022-of-00059.safetensors b/model-00022-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..988152aa0c789b4984857aab5c00baecfec2e233 --- /dev/null +++ b/model-00022-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b5f1a40a2942e0cd72775397947feb7bddbda182985eaf79daa90da9c0e20e7 +size 4806799152 diff --git a/model-00023-of-00059.safetensors b/model-00023-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bec6647e6bfe0fa97d380bd392006a38eee4c55 --- /dev/null +++ b/model-00023-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd53346c96de33ccb9387f4b173c34004512226c34c06323ef0fe7578877d8d +size 4806799152 diff --git a/model-00024-of-00059.safetensors b/model-00024-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c8624dee80339bf917bec9ffd19a01a94d0b674 --- /dev/null +++ b/model-00024-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33a12a9db9323cc44b0a3101f461da26cd23e0721c15f0351349632be5ff1e36 +size 4932529864 diff --git a/model-00025-of-00059.safetensors b/model-00025-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ab10ef4747689ee014d8b8b3250b35fa1ecdd20 --- /dev/null +++ b/model-00025-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1d4ecbad71ff079612a1c5fb0e7ea1f98b712f1bcced164f6b53c580c3dcdf4 +size 4995542848 diff --git a/model-00026-of-00059.safetensors b/model-00026-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9f8df1514382542568c8e81a0e0e1643f058628 --- /dev/null +++ b/model-00026-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e83a92085f9325957d80aab4c2b27d7519e637b62a9e2591a91526e3924b6e1c +size 4995542848 diff --git a/model-00027-of-00059.safetensors b/model-00027-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17e78d140e850eb7925fc6da38c77f3b7be91821 --- /dev/null +++ b/model-00027-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97900e8e9522545497945fc89d5f488ce70869c30e32f286236b2c46b6d36249 +size 4932628288 diff --git a/model-00028-of-00059.safetensors b/model-00028-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e29379029b65742363dbf5cece13fb45b564d26c --- /dev/null +++ b/model-00028-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49156a048d247a4aec7b8f7ea8ec497c836edd0c5b599a37a5c606806dfc9ae0 +size 4806774344 diff --git a/model-00029-of-00059.safetensors b/model-00029-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4e8405f67e236b0a38f8202800c8de2a340166d --- /dev/null +++ b/model-00029-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f120d5f53081bc8ce8339e5aaaae226f00eec5909414c296f16dd42a0c3865c8 +size 4806799144 diff --git a/model-00030-of-00059.safetensors b/model-00030-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4240a349e28d385a4b92d2e5dc61867908bb4f0d --- /dev/null +++ b/model-00030-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c73d242b90c426bc66dfe827b441deb73d68d86fbfd55a9370a1ddb6aa769c9e +size 4806799144 diff --git a/model-00031-of-00059.safetensors b/model-00031-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bba7286d6b8907758a42bb0a3214c9ff87ce7af0 --- /dev/null +++ b/model-00031-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83030c28c568a05b5dd8b17e1bd870672921acf1e3ff6a4978e8f336db382aff +size 4806799144 diff --git a/model-00032-of-00059.safetensors b/model-00032-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc7fb1ea471278a5c02e76c8c9cf261318b039e4 --- /dev/null +++ b/model-00032-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4389108be53523985433dcd171b7e22fe9e51ee7add36be8c4df100b5ad4eed1 +size 4806799144 diff --git a/model-00033-of-00059.safetensors b/model-00033-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93286cc975a7742d205540f4d692e7a44ac89b3f --- /dev/null +++ b/model-00033-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c131596bfd971e605de7aed55495bf9be723a159e03d619c091738df46edbea +size 4806799152 diff --git a/model-00034-of-00059.safetensors b/model-00034-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6b879afdb1c262b145be20f407e883e378f61d1 --- /dev/null +++ b/model-00034-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee114be8ba934f9aced075356dbde02b7bcd5adc36cb6ab0c8de35b343a42577 +size 4806799152 diff --git a/model-00035-of-00059.safetensors b/model-00035-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43afc3735eba0a480208e9cec64a7dca92462e8b --- /dev/null +++ b/model-00035-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c723d67bdedb824af4d246f122cc6e14c645ef1bb7960fd8c25d02f596beb18 +size 4806799152 diff --git a/model-00036-of-00059.safetensors b/model-00036-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01a61dc7ddd71ef816f33ed0a841c130f4633f1e --- /dev/null +++ b/model-00036-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508228cda4960dac196b058125cfbd8fc50a50520e42174680a532fea441e496 +size 4806799152 diff --git a/model-00037-of-00059.safetensors b/model-00037-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e31adfc5ba482df1c708c09fa3d7c49f2bef3e5 --- /dev/null +++ b/model-00037-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d145a7fbf7cedf87fb1c17cddd0a740399777bed27d6861fb0bb1de12b54a05c +size 4806799152 diff --git a/model-00038-of-00059.safetensors b/model-00038-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55bc159b658b8621c0afbe076c844899dd183aa8 --- /dev/null +++ b/model-00038-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15d801e05a669b53971cf6a69e058c6c5d9e59bf7b399195ab89421f12f84b85 +size 4806799152 diff --git a/model-00039-of-00059.safetensors b/model-00039-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a8d346015954226eecd9d4175fc740c79b81793 --- /dev/null +++ b/model-00039-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1249dee208fb3fded28c70c825e7bc9e3fa7cfc91f38139cf265b73fdaf3bd4 +size 4806799152 diff --git a/model-00040-of-00059.safetensors b/model-00040-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..05ed11f86eb54f9bb89e36b3472322478043a5cc --- /dev/null +++ b/model-00040-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38cf573512843fd554388c2b451ae05fdde0ce35746ebcacda6fae7d9e5e3f7a +size 4806799152 diff --git a/model-00041-of-00059.safetensors b/model-00041-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6fe8017e7bbf8a16a2c0b8a3fda5a3bd6cb694be --- /dev/null +++ b/model-00041-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4656908e119cd5fc1898b3547e28778e3b6b21108d2d1de0a22aa458aa8d5ee1 +size 4806799152 diff --git a/model-00042-of-00059.safetensors b/model-00042-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f1c15fa26d60eeeeb40fd82c39a6308ae865b98f --- /dev/null +++ b/model-00042-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d13e13fb51c1934eed90f40bf310d5d430d3fe9ece7fe0557a15daf94407a8f +size 4806799152 diff --git a/model-00043-of-00059.safetensors b/model-00043-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b72dd7516910cf13c133b95d0c04e0e2a1b6f371 --- /dev/null +++ b/model-00043-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b42523511ba6fcf7231469b15bae6af3cdccd0ac5f6a50bc780dc2321c7258e +size 4806799152 diff --git a/model-00044-of-00059.safetensors b/model-00044-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4200f219e152fa76c82f234123b158f955cc393a --- /dev/null +++ b/model-00044-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e723e9cdc7d276df62775929952b1b25b38c21b75a63b4beb0a6d02cd617e3d9 +size 4806799152 diff --git a/model-00045-of-00059.safetensors b/model-00045-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f45179f7fc4d7604ab51c32866ac5130ae9e430d --- /dev/null +++ b/model-00045-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01c14d1c87da6128abfb084937d2d11b04925ea133bd28b3adb114d11a33ceb3 +size 4806799152 diff --git a/model-00046-of-00059.safetensors b/model-00046-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7032e5a7101b0fa21a0a6af5e7313a3fae150337 --- /dev/null +++ b/model-00046-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6923f08ff8247b9755da04268038025ddb4a1eef8fe50caf8c78e5b0ec17547 +size 4806799152 diff --git a/model-00047-of-00059.safetensors b/model-00047-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2954220391752e24704837393602b8e4a65d8b5f --- /dev/null +++ b/model-00047-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18be23ebc7836331f3692e252a34a78aa003c952437b8ba483738bcc0d08371a +size 4806799152 diff --git a/model-00048-of-00059.safetensors b/model-00048-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9775bd304371edfba80c68459c9c283e3c6ee17b --- /dev/null +++ b/model-00048-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ef6a2d7efaf15246159ee851786cb270a7b78f49c1878dcda74619a5bd8b87 +size 4806799152 diff --git a/model-00049-of-00059.safetensors b/model-00049-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd6f255dee46783626c062ca535446b720ec8259 --- /dev/null +++ b/model-00049-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd008956b3e4c77cd755139afe5c4ec37a63e7db5c84feafc71e88835310e507 +size 4806799152 diff --git a/model-00050-of-00059.safetensors b/model-00050-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0dda087abd6c57e53460832aac90fce6bf7fd4de --- /dev/null +++ b/model-00050-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7eb425d2d6133cde5be41b263138e1efbfad682fd56b5ff174ea2173bad9e4 +size 4806799152 diff --git a/model-00051-of-00059.safetensors b/model-00051-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed51a57fc5fd989eac585123de9b9ed08332f811 --- /dev/null +++ b/model-00051-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b082bd764f1e99de20b9eb059ff5a9079f70520319d9e3b1ff564b552af18324 +size 4806799152 diff --git a/model-00052-of-00059.safetensors b/model-00052-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e956c6fb8f347cd9cde046a8e125f051da572727 --- /dev/null +++ b/model-00052-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de3c7d0077e9b198308ac6ef6f57a1f9d84446a68e09a4929d7446d3f668e24b +size 4932529864 diff --git a/model-00053-of-00059.safetensors b/model-00053-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..79ebaeeb065152f8eb12144f14a87a97f5f52f1a --- /dev/null +++ b/model-00053-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb113ec4883a5a66329bda3abacdda57baa56c7585d1b4bdce168763c63e3582 +size 4995542848 diff --git a/model-00054-of-00059.safetensors b/model-00054-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..83217d858025f82201937798ceff8f62ce299ee2 --- /dev/null +++ b/model-00054-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:266264b68c5a1447a01c40ae4673cd7194ec262abdf1e40b703785f260e9fc3a +size 4995542848 diff --git a/model-00055-of-00059.safetensors b/model-00055-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12e9e8d2f66d4a29d6eb9261bef760ccf14aa20f --- /dev/null +++ b/model-00055-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d2c796c64e5e3df6022e9b7dd06c220c1d8645da60ccac68762ea53f2c3c31 +size 4932628288 diff --git a/model-00056-of-00059.safetensors b/model-00056-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9f338ccfd75af81bf5066a7867bb18c4c5e320c --- /dev/null +++ b/model-00056-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87e791c107c68821450316a8ed07d1949a942f77686f2bb466028198fed02e63 +size 4806774344 diff --git a/model-00057-of-00059.safetensors b/model-00057-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0cc6a272d3b7d4195ac38f9fa1fe205313edec8 --- /dev/null +++ b/model-00057-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f2ce018efb32fcc79ef0a3646b560fc06e0cfd02cbe1f83c149ce39ad490d25 +size 4806799144 diff --git a/model-00058-of-00059.safetensors b/model-00058-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59ba51605435a11e0575c9bd3a57721e6db82391 --- /dev/null +++ b/model-00058-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15aa2bdab964fda786fd56d233dba306570f5fb77f0b632f8cd408b02958ad89 +size 4806799144 diff --git a/model-00059-of-00059.safetensors b/model-00059-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6a1a92ea4b485bafd31a1303aa4fe68aaff54052 --- /dev/null +++ b/model-00059-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d252e46ddc98799062ead170128a24260ea63b53144ae38da2ec816251ab099 +size 997233472 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..805c18819ee3c4a51164b634df4f4033c74583ad --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1746 @@ +{ + "metadata": { + "total_size": 281241268224 + }, + "weight_map": { + "lm_head.weight": "model-00059-of-00059.safetensors", + "model.embed_tokens.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00059.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00059.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00059.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00059.safetensors", + "model.layers.1.input_layernorm.weight": "model-00003-of-00059.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00059.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00011-of-00059.safetensors", + "model.layers.10.input_layernorm.weight": "model-00012-of-00059.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00012-of-00059.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00012-of-00059.safetensors", + "model.layers.11.input_layernorm.weight": "model-00013-of-00059.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00013-of-00059.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00013-of-00059.safetensors", + "model.layers.12.input_layernorm.weight": "model-00014-of-00059.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00014-of-00059.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00014-of-00059.safetensors", + "model.layers.13.input_layernorm.weight": "model-00015-of-00059.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00015-of-00059.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00015-of-00059.safetensors", + "model.layers.14.input_layernorm.weight": "model-00016-of-00059.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00016-of-00059.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00016-of-00059.safetensors", + "model.layers.15.input_layernorm.weight": "model-00017-of-00059.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00017-of-00059.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00017-of-00059.safetensors", + "model.layers.16.input_layernorm.weight": "model-00018-of-00059.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00018-of-00059.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00018-of-00059.safetensors", + "model.layers.17.input_layernorm.weight": "model-00019-of-00059.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00019-of-00059.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00019-of-00059.safetensors", + "model.layers.18.input_layernorm.weight": "model-00020-of-00059.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00020-of-00059.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00020-of-00059.safetensors", + "model.layers.19.input_layernorm.weight": "model-00021-of-00059.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00021-of-00059.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00003-of-00059.safetensors", + "model.layers.2.input_layernorm.weight": "model-00004-of-00059.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00004-of-00059.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00021-of-00059.safetensors", + "model.layers.20.input_layernorm.weight": "model-00022-of-00059.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00022-of-00059.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00022-of-00059.safetensors", + "model.layers.21.input_layernorm.weight": "model-00023-of-00059.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00023-of-00059.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00023-of-00059.safetensors", + "model.layers.22.input_layernorm.weight": "model-00024-of-00059.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00024-of-00059.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00023-of-00059.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00023-of-00059.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00023-of-00059.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00023-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00025-of-00059.safetensors", + "model.layers.23.input_layernorm.weight": "model-00025-of-00059.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00025-of-00059.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00024-of-00059.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00025-of-00059.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00024-of-00059.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00024-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00026-of-00059.safetensors", + "model.layers.24.input_layernorm.weight": "model-00026-of-00059.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00026-of-00059.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00025-of-00059.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00026-of-00059.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00025-of-00059.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00026-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00027-of-00059.safetensors", + "model.layers.25.input_layernorm.weight": "model-00027-of-00059.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00027-of-00059.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00027-of-00059.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00027-of-00059.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00026-of-00059.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00027-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00028-of-00059.safetensors", + "model.layers.26.input_layernorm.weight": "model-00029-of-00059.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00029-of-00059.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00029-of-00059.safetensors", + "model.layers.27.input_layernorm.weight": "model-00030-of-00059.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00030-of-00059.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00030-of-00059.safetensors", + "model.layers.28.input_layernorm.weight": "model-00031-of-00059.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00031-of-00059.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00031-of-00059.safetensors", + "model.layers.29.input_layernorm.weight": "model-00032-of-00059.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00032-of-00059.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00004-of-00059.safetensors", + "model.layers.3.input_layernorm.weight": "model-00005-of-00059.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00005-of-00059.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00032-of-00059.safetensors", + "model.layers.30.input_layernorm.weight": "model-00033-of-00059.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00033-of-00059.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00033-of-00059.safetensors", + "model.layers.31.input_layernorm.weight": "model-00034-of-00059.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00034-of-00059.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00034-of-00059.safetensors", + "model.layers.32.input_layernorm.weight": "model-00035-of-00059.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00035-of-00059.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00035-of-00059.safetensors", + "model.layers.33.input_layernorm.weight": "model-00036-of-00059.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00036-of-00059.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00036-of-00059.safetensors", + "model.layers.34.input_layernorm.weight": "model-00037-of-00059.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00037-of-00059.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.gate.weight": "model-00037-of-00059.safetensors", + "model.layers.35.input_layernorm.weight": "model-00038-of-00059.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00038-of-00059.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.gate.weight": "model-00038-of-00059.safetensors", + "model.layers.36.input_layernorm.weight": "model-00039-of-00059.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00039-of-00059.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.gate.weight": "model-00039-of-00059.safetensors", + "model.layers.37.input_layernorm.weight": "model-00040-of-00059.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00040-of-00059.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.gate.weight": "model-00040-of-00059.safetensors", + "model.layers.38.input_layernorm.weight": "model-00041-of-00059.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00041-of-00059.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.gate.weight": "model-00041-of-00059.safetensors", + "model.layers.39.input_layernorm.weight": "model-00042-of-00059.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00042-of-00059.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00005-of-00059.safetensors", + "model.layers.4.input_layernorm.weight": "model-00006-of-00059.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00006-of-00059.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.gate.weight": "model-00042-of-00059.safetensors", + "model.layers.40.input_layernorm.weight": "model-00043-of-00059.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00043-of-00059.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.gate.weight": "model-00043-of-00059.safetensors", + "model.layers.41.input_layernorm.weight": "model-00044-of-00059.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00044-of-00059.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.gate.weight": "model-00044-of-00059.safetensors", + "model.layers.42.input_layernorm.weight": "model-00045-of-00059.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00045-of-00059.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.gate.weight": "model-00045-of-00059.safetensors", + "model.layers.43.input_layernorm.weight": "model-00046-of-00059.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00046-of-00059.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.gate.weight": "model-00046-of-00059.safetensors", + "model.layers.44.input_layernorm.weight": "model-00047-of-00059.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00047-of-00059.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.gate.weight": "model-00047-of-00059.safetensors", + "model.layers.45.input_layernorm.weight": "model-00048-of-00059.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00048-of-00059.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.gate.weight": "model-00048-of-00059.safetensors", + "model.layers.46.input_layernorm.weight": "model-00049-of-00059.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00049-of-00059.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.gate.weight": "model-00049-of-00059.safetensors", + "model.layers.47.input_layernorm.weight": "model-00050-of-00059.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00050-of-00059.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.gate.weight": "model-00050-of-00059.safetensors", + "model.layers.48.input_layernorm.weight": "model-00051-of-00059.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00051-of-00059.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.gate.weight": "model-00051-of-00059.safetensors", + "model.layers.49.input_layernorm.weight": "model-00052-of-00059.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00052-of-00059.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00051-of-00059.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00051-of-00059.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00051-of-00059.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00051-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00006-of-00059.safetensors", + "model.layers.5.input_layernorm.weight": "model-00007-of-00059.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00007-of-00059.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.gate.weight": "model-00053-of-00059.safetensors", + "model.layers.50.input_layernorm.weight": "model-00053-of-00059.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00053-of-00059.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00052-of-00059.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00053-of-00059.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00052-of-00059.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00052-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.gate.weight": "model-00054-of-00059.safetensors", + "model.layers.51.input_layernorm.weight": "model-00054-of-00059.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00054-of-00059.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00053-of-00059.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00054-of-00059.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00053-of-00059.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00054-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.gate.weight": "model-00055-of-00059.safetensors", + "model.layers.52.input_layernorm.weight": "model-00055-of-00059.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00055-of-00059.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00055-of-00059.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00055-of-00059.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00054-of-00059.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00055-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.53.block_sparse_moe.gate.weight": "model-00056-of-00059.safetensors", + "model.layers.53.input_layernorm.weight": "model-00057-of-00059.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00057-of-00059.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.54.block_sparse_moe.gate.weight": "model-00057-of-00059.safetensors", + "model.layers.54.input_layernorm.weight": "model-00058-of-00059.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00058-of-00059.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w1.weight": "model-00059-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w2.weight": "model-00059-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w3.weight": "model-00059-of-00059.safetensors", + "model.layers.55.block_sparse_moe.gate.weight": "model-00058-of-00059.safetensors", + "model.layers.55.input_layernorm.weight": "model-00059-of-00059.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00059-of-00059.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00007-of-00059.safetensors", + "model.layers.6.input_layernorm.weight": "model-00008-of-00059.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00008-of-00059.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00008-of-00059.safetensors", + "model.layers.7.input_layernorm.weight": "model-00009-of-00059.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00009-of-00059.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00009-of-00059.safetensors", + "model.layers.8.input_layernorm.weight": "model-00010-of-00059.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00010-of-00059.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00010-of-00059.safetensors", + "model.layers.9.input_layernorm.weight": "model-00011-of-00059.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00011-of-00059.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00010-of-00059.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00010-of-00059.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00010-of-00059.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00010-of-00059.safetensors", + "model.norm.weight": "model-00059-of-00059.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..14761dcf1466dc232bd41de9c21d4c617b15755e --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,24 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..fd92a7b4ef5fd2a5b85ed21c2458d3f79ce9ba90 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{{ messages[0]['content'].strip() }}{% else %}{% set loop_messages = messages %}{{ 'A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user\\'s questions.' }}{% endif %}{% for message in loop_messages %}{% if loop.index0 == 0 %}{% if message['role'] == 'system' or message['role'] == 'user' %}{{ ' USER: ' + message['content'].strip() }}{% else %}{{ ' ASSISTANT: ' + message['content'].strip() + eos_token }}{% endif %}{% else %}{% if message['role'] == 'system' or message['role'] == 'user' %}{{ '\nUSER: ' + message['content'].strip() }}{% else %}{{ ' ASSISTANT: ' + message['content'].strip() + eos_token }}{% endif %}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ ' ASSISTANT:' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": true +}