diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..4ea65a2c64946862f32338e63558b5f902d33758
--- /dev/null
+++ b/README.md
@@ -0,0 +1,18 @@
+---
+library_name: transformers
+base_model:
+- Sao10K/MN-12B-Lyra-v1
+datasets:
+- jondurbin/gutenberg-dpo-v0.1
+license: apache-2.0
+---
+
+# mistral-nemo-gutenberg-12B-v4
+
+[Sao10K/MN-12B-Lyra-v1](https://huggingface.co/Sao10K/MN-12B-Lyra-v1) finetuned on [jondurbin/gutenberg-dpo-v0.1](https://huggingface.co/datasets/jondurbin/gutenberg-dpo-v0.1).
+
+### Method
+
+Finetuned using an A100 on Google Colab for 3 epochs.
+
+[Fine-tune Llama 3 with ORPO](https://mlabonne.github.io/blog/posts/2024-04-19_Fine_tune_Llama_3_with_ORPO.html)
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..65efe8ec155fb2b85e254b45610bbd3546f25d30
--- /dev/null
+++ b/config.json
@@ -0,0 +1,38 @@
+{
+ "_name_or_path": "Sao10K/MN-12B-Lyra-v1",
+ "architectures": [
+ "MistralForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "head_dim": 128,
+ "hidden_act": "silu",
+ "hidden_size": 5120,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 1024000,
+ "model_type": "mistral",
+ "num_attention_heads": 32,
+ "num_hidden_layers": 40,
+ "num_key_value_heads": 8,
+ "rms_norm_eps": 1e-05,
+ "rope_theta": 1000000.0,
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.44.2",
+ "use_cache": true,
+ "vocab_size": 131072,
+ "quantization_config": {
+ "quant_method": "exl2",
+ "version": "0.2.0",
+ "bits": 8.0,
+ "head_bits": 8,
+ "calibration": {
+ "rows": 115,
+ "length": 2048,
+ "dataset": "(default)"
+ }
+ }
+}
\ No newline at end of file
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7e79bd1a4c8a5c040a1c6df4c460f95e6acabfc8
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,6 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "transformers_version": "4.44.2"
+}
diff --git a/measurement.json b/measurement.json
new file mode 100644
index 0000000000000000000000000000000000000000..37a0081ab1be92df27766cf36f5dc7aeea404e06
--- /dev/null
+++ b/measurement.json
@@ -0,0 +1,78047 @@
+{
+ "measurement": {
+ "model.layers.0.self_attn": [
+ {
+ "accuracy": 0.8891013296026933,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9046755464453446,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9172681695536563,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9428868795696058,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9494476820293226,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9511421228709974,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9635056476843984,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.965098566130588,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686285853385925,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9709661163781819,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761431154451872,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9776831786883505,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979078366568214,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808324418569866,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883569620157543,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990346978369512,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905368191631216,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939948014522854,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974107761916361,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.0.mlp": [
+ {
+ "accuracy": 0.912018242635225,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9190313753328825,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9135642992822748,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9139842045934576,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9788062211714292,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847688612185026,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860969530908685,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991108731219643,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932811268066105,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925698873243833,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946844585632023,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969670864704409,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973029106071121,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981933514538565,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983497852165448,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987488202750683,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991876504531032,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.self_attn": [
+ {
+ "accuracy": 0.9770505145976418,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780386325560118,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982118525003132,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986174685390372,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873079845779821,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884039579253447,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989402921576249,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906052531380403,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919965706373516,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922395011312083,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993968127197341,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950073188857028,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943925326592044,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954741761872643,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966204793829667,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997636966603367,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970275634213498,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989481249726132,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992075720311779,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.mlp": [
+ {
+ "accuracy": 0.9788516352051183,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9793986474212847,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829599308340173,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841399130068327,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893241422741037,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901707815496545,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917785148871573,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944328932385695,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949549350299334,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945746460243275,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952481985092163,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972329920059756,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997631567481317,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984975619927833,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985910700143952,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989147370583132,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995932889317996,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.self_attn": [
+ {
+ "accuracy": 0.9639236017277366,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9645607471466064,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9741614272719935,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9776709550305417,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794261424165023,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828897338164481,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9813569611624667,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850506751160872,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891979764950903,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896439894249565,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910231107159665,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931382466303674,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916156083345413,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936806343887982,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947448388526314,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969483445349493,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950496668094083,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986329912943276,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987403883745796,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.mlp": [
+ {
+ "accuracy": 0.9627671116276791,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630447437888697,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9574088924809506,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9577799596284565,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823380034220847,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888282331981157,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897159071345079,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850006479966013,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990828306267136,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920847055159117,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944192198546309,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941809436208323,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931142777204514,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944623810680289,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982010447665265,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983678144451819,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984951449656173,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.self_attn": [
+ {
+ "accuracy": 0.9888793982957539,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896515576462996,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991479690921934,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9935769321102845,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944591647700259,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945066516336641,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957966816268469,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958283822787436,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963092827483228,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964689247702297,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972614510671088,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974752868476667,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975284624256586,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977709103963877,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998601984330698,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988708243166146,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988607749538986,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999437858399592,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999702124488785,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.mlp": [
+ {
+ "accuracy": 0.9804559381384599,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810124447471217,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842568243804731,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852883658911052,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900930280748167,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908947019200576,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923368159093355,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948202622564215,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952973903794038,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949477989422647,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955781780575451,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974236362858823,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977976775875217,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985963457116955,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986859268851971,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989889776824337,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999627347888523,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.self_attn": [
+ {
+ "accuracy": 0.9846252344156566,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985147918525495,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987892652812757,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904759338027552,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913714112419831,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923942245935139,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929233742387671,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941608933241743,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952084747584242,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952621714849221,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959858067725834,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964389757890451,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963463772284357,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967313134356549,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978335172330078,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984030417705837,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998124785721302,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992251652047822,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994973552560336,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.mlp": [
+ {
+ "accuracy": 0.9753363806950418,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9760880391848715,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802918528255663,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815874946744818,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875081824628931,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885314357908148,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903550728371269,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993468343035171,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994062692319092,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936292116579256,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944328061844173,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967527503245756,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972236583891668,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982317093955843,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983388416861233,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987224702184138,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995232587680221,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.self_attn": [
+ {
+ "accuracy": 0.984828415669893,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854254001065305,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874196923092792,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905226371790233,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918936624338752,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922341045580412,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938768967986107,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942623204306552,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951908674679304,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954377259863051,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960527145548871,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964573344117716,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964982177081861,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968912295605007,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980856082157085,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984392969820061,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984989723092631,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991357571592456,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996007849511347,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.mlp": [
+ {
+ "accuracy": 0.9698348170832584,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9707464011091935,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9759094417095184,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.977492663421129,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847094479360079,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859627924467388,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881997829989383,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919878754176592,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927276121942621,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922015863029581,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931857978042803,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960271445544142,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966027281786266,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978354430120242,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979745126084277,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984479753398582,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994254656332103,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.self_attn": [
+ {
+ "accuracy": 0.9799654028917614,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807660611052262,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828447715232247,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873453261036622,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898306554869601,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900954036336196,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929530989182623,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932769623241926,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993595724435229,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941379251448732,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949160080991293,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952283038904792,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954760356953269,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957852057720485,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974584801024512,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978692864901141,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980883961053271,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987582102614013,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994916843838597,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.mlp": [
+ {
+ "accuracy": 0.9641733357780858,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9653176665306091,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9715242354493392,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733988328983909,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981934484682585,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834108964393014,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860636764451077,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905428988368887,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914019437212693,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907889993567216,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919511966015163,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953097920668753,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959874008046953,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974356709342254,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997604384430145,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981611310259292,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993194939666673,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.self_attn": [
+ {
+ "accuracy": 0.9754221533474169,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9764122147309152,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9787315349829825,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843543234624361,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875391415859524,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878799797672975,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917902852359571,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922255112936622,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929417238423699,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929371625185013,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993830573401953,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994315088971665,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945873665182214,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950911575242093,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970163363767298,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975328486608831,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978621753030702,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986377750572405,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994278226635958,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.mlp": [
+ {
+ "accuracy": 0.961105227470398,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9623129932503951,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.968925604694768,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9708975835850364,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9804224497393558,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820124899086199,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848021378642634,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897949923025934,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990679355044114,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900062131254297,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912574103004054,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949006732357176,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956400417967847,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972240052333003,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973941804154923,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979869270403134,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992624778594625,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.self_attn": [
+ {
+ "accuracy": 0.9737683158171805,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9750757280148958,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778100503118414,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836637456166116,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863806147324411,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868430275666086,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906444933853651,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911891723933973,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923183557234312,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925133680042467,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931276126911766,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939316252344533,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942111243542872,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947335696534106,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967265580045549,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973123981372306,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976448326518661,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984792603277847,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993704956907191,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.mlp": [
+ {
+ "accuracy": 0.9585083472101312,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9597457898290533,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9667617710013139,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9688790503301119,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9789869283374987,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.980723040668588,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837082185243305,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890099465847015,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899821422602001,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892588838150627,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906138197371834,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945150915729372,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995311942539717,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970038219502098,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972021193488648,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997846950825892,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992077257484198,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.self_attn": [
+ {
+ "accuracy": 0.9685185457530775,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.969665853600753,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725601296675833,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798050162039305,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834198512529072,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.983682770478098,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888548003999811,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891762458964398,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907288135666596,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911850424189317,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915322284949454,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925050233539782,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926971040273967,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933808728268272,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958064383582065,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966912030389434,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968989407153506,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981696225310627,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991501955021369,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.mlp": [
+ {
+ "accuracy": 0.9579107573157862,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9592016496156391,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9661374625406767,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9682297047815824,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9783382055006529,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9804436689928958,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9833819285819405,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98846004668035,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896234822900671,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889227261668757,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990398067392801,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944288632587382,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952565855101535,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969261008265772,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971385717784104,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977780155053264,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991142581167974,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.self_attn": [
+ {
+ "accuracy": 0.9685169677985341,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9695447495109156,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9722959179627267,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800341725349426,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.983910786478143,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841996117642051,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898978236474489,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902536986689818,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901543149822637,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906390183850339,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918599630656996,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924744877376055,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929345171702536,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933662673360423,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960385776664081,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966762924664899,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973105743135277,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980557809926962,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992549702721206,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.mlp": [
+ {
+ "accuracy": 0.9561995926656222,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9575660605179637,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.96490499534105,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.967078707720104,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9777827419732746,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979684381108535,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827285675626052,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883323578458083,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893647518597151,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988605954929402,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900658256129214,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941606933349058,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950282365867966,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99678447744564,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970083668043739,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976965653661051,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991337427575337,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.self_attn": [
+ {
+ "accuracy": 0.9679932374703257,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9690274690326891,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9716821846209074,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9785704816642561,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981986685803062,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825517974401775,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871396900791871,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876569815372166,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896898104956275,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908148693410974,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913601483169355,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922687348566557,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926175035928425,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930814857545652,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957488716432923,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965745277310673,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968882183495321,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979633934011585,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991724468571576,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.mlp": [
+ {
+ "accuracy": 0.9557207885541414,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9571119546890259,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9642994152872186,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663796487607454,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9775846804443159,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9795831269339511,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982498005816811,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988276763966209,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893205636426022,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988477036356926,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899976120183342,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940713761668456,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949886943948897,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967372319415996,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969541732417909,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976078820855994,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991302666695494,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.self_attn": [
+ {
+ "accuracy": 0.9689844846725464,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9698838654317354,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726814564905668,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794613028827467,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836629660505998,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841187392410479,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885547710092444,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890841677000648,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905567796606767,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916701254091764,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991952300855988,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925007482892588,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928048033463327,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937865098840312,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960961886926701,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968393950870162,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971428491959446,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981014685411203,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991752584709933,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.mlp": [
+ {
+ "accuracy": 0.9561373434568706,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9574500479196247,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9642790273616189,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663134750566984,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9777158592876635,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796713512194785,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825506304439745,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883122616692593,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893351432524229,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885767455163755,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900487982913068,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994130004393427,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995014379291158,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967614916202269,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969960102125218,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976255821162149,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991170018911362,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.self_attn": [
+ {
+ "accuracy": 0.9654066154831334,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662891124424181,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689712994977048,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754727074974462,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807172103932029,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815375867642855,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855450532938305,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864389457200703,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888587460706109,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903374863298315,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910191830835844,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917457111571965,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918022861606196,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928948510634271,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954394913817707,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963635817954415,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966039234086087,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997730767648471,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990845256926197,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.mlp": [
+ {
+ "accuracy": 0.9540106685538041,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.955588299977152,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9625096697556346,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9645847144879793,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9768255368659371,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9788477907055303,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818303726221386,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878702547989393,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888427320279574,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881000848192918,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896276514781149,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938663240326079,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947938150481174,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966022497729251,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968494618017423,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974914316676164,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990363195538521,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.self_attn": [
+ {
+ "accuracy": 0.9645833059361106,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656317234039307,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9682068322834215,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9734091350906774,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820007430879694,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825069276910079,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986194234145315,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868836967568648,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857102161959598,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865743282594179,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907517205727728,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913572025926489,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914826484102952,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922112900959817,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995465261763648,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960304812381142,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964218022007691,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977415886364485,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990430099791602,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.mlp": [
+ {
+ "accuracy": 0.9523954516962955,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9539113609414351,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9612953788355777,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9635417806474786,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758312404155731,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779075023375059,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810490561159033,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872462843593798,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883254562553606,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875318502124987,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891541733553535,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936003332075319,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945629243003694,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964424869731853,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967169351875782,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973959320862043,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990179274035128,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.self_attn": [
+ {
+ "accuracy": 0.9558703742529217,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9571826740315086,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9592975754486888,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9655017727299741,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802773783081457,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808384556519357,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9853190105212363,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986091263984379,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860054505498785,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871614151879361,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896443572483564,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903529396182612,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904108298452277,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911749951149288,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941806389312995,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955422215555844,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950673372337693,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972426424685278,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988666558148045,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.mlp": [
+ {
+ "accuracy": 0.9511388320671885,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9527247767699392,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9606201429116099,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630659504940635,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9751465540183218,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772848737867255,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9806604165779916,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868244168005491,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879847694384424,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872479211342963,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888804398084942,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934597054594442,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944356064263143,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963653354268325,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966606658540274,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973985776305199,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990251873080668,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.self_attn": [
+ {
+ "accuracy": 0.9555749485367223,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.956750003915084,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.957253939227054,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9631512196440446,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9797971044716082,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800868881376166,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862079871328253,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864829057141354,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872762647114302,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874935354057112,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896948698319887,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903007251651663,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905873231197658,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914315986005884,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946921930501336,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954521558002422,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958120502139393,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969852429471517,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988961251158464,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.mlp": [
+ {
+ "accuracy": 0.9484716057777405,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9501671414626272,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9585445962454143,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9612243426473517,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9737182714437184,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9759667688294461,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979594150656148,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985976484261061,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987226087011789,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865109167600933,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882404859128752,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930868093904696,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941092498208347,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996132816923292,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964784203391326,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997282580325478,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989579322894937,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.self_attn": [
+ {
+ "accuracy": 0.9549134154068797,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563598350474709,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9599989966342324,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686117956512853,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772106393387443,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9776494910842494,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847655688461504,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985298078311117,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858327100151464,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864167006392228,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883103056957847,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892495682365016,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898211085482648,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906267155157892,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994386057320394,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952343976811359,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958552235835477,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973457622292795,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988928414685162,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.mlp": [
+ {
+ "accuracy": 0.9438334828928897,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9457981272747642,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9550310937981856,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9580646533715098,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9713896920806483,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738791020292985,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778339266777039,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845926040097287,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860184600478724,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9853400384124956,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872366456609023,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924909048958829,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936114435917452,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957507691885296,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961883006127257,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970734962507298,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988547725308883,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.self_attn": [
+ {
+ "accuracy": 0.9559294048108553,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.957930536646592,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9620990878657291,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9706907429193196,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766521249946795,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9775293030236897,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831922148403368,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842835602007414,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867937439366391,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988002136349678,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880818669733248,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897160412449586,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892830331074564,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910039980160562,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938113014948996,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955720501510721,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949812022478957,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974538227053065,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986690856320294,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.mlp": [
+ {
+ "accuracy": 0.9416577502300865,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9436888882988378,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9533512811911733,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.95653509152563,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9703552660189176,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9729116339432565,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9770912151587637,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840903658615915,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855375964390604,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848158986944902,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867749261228662,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922283213389548,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933797262216869,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956083697708029,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960402273818066,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969547463482932,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987809396299877,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.self_attn": [
+ {
+ "accuracy": 0.9561093512334322,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9572704716732627,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9612601619017751,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691363824041266,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761646295848646,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774238373103895,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817052593356684,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832779074970045,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870239519759229,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987200412311052,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879458915246161,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900479716689963,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891268140391299,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912436729983279,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933993259542867,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954972353420759,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944254575591338,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974085929754534,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985089815760914,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.mlp": [
+ {
+ "accuracy": 0.9414692677949604,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9434707854923449,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9530224423659475,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9561696648597717,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.970307961890572,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9728640286546004,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9769965551401439,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841110627902182,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855254449342427,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847932254013262,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867778059683348,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922236862935518,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933888276940898,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956036304172716,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960526098546229,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969345523338569,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988016418524479,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.self_attn": [
+ {
+ "accuracy": 0.9606074471222728,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.961264779693202,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9657620536653619,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726151761255766,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9787210241744393,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9799080836145502,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837260528614646,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850529244071559,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877202667688069,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882590190360421,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900746314149154,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99105004966259,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908346714157807,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919358648751911,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943309916477454,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959760629817059,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952676574650564,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997723855862492,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988069481363422,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.mlp": [
+ {
+ "accuracy": 0.9413384010917262,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9432244677292674,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.952817584338941,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9559963753348902,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9702967154352289,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9727509460951153,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9769560051591772,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841651445940921,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855899904903612,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848397358467704,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867541633154217,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922594749613812,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933813378999108,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956754814637335,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960656468021242,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969419705632486,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988132803455779,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.self_attn": [
+ {
+ "accuracy": 0.9637410828941747,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9648343010952598,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.968260363528603,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975704982092506,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811461756103917,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818531086570338,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862129092216492,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871376407773871,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891879048786665,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901148096511239,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908343820195449,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918414283739893,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916176090114995,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929687953309009,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953275997387735,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963333002830806,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963793985937771,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997806051255841,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990211491914172,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.mlp": [
+ {
+ "accuracy": 0.939667356641669,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9415376374596044,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9512078824796175,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9544269630783483,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9693661865435148,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9718836106752095,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761558222143274,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.983725053699393,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852007376520258,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843674584438926,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863483058778864,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920291924162915,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931810247270685,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955713827359048,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959505274891853,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968447693084416,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987868167656032,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.self_attn": [
+ {
+ "accuracy": 0.9607256556812086,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9619269559257909,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663728569683275,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726981928474024,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786708339264518,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9795487272111993,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832386829351124,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842545515612552,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880925762025934,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885581245547846,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990002987416167,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910854093338314,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909318450250124,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921770511489165,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945622792369441,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960551508947423,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995392088827334,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977890370707763,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987082765682748,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.mlp": [
+ {
+ "accuracy": 0.9363987822281687,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.938377254887631,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9484745797358061,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9518385215809471,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9675943192682768,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9703324123432762,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9748188100363079,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827372337642469,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843182422612843,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9833915123814031,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855571561738065,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914896401919817,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927776055900674,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952393327104417,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956560393697337,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965843640659985,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986764308261243,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.self_attn": [
+ {
+ "accuracy": 0.9660938413519609,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9675319728098417,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9704899380081579,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9769313335418701,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9816532135009766,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832936415546819,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986379021092465,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885177910327911,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897706845873281,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904527170093436,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912732157268023,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992170061720045,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923178639851118,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930738351846996,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952746810097444,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996455936839706,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962000537075495,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980003861220259,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989570422392142,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.mlp": [
+ {
+ "accuracy": 0.9338111250024093,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9358968734741211,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9462488889694214,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9496818498561257,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966222069765392,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691112512036374,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9737214822518198,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9819619749721727,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836302085926658,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827076933885875,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849619332112765,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991136726580168,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924737066030502,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950150616074863,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954815495171045,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964409254883465,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986199033691695,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.self_attn": [
+ {
+ "accuracy": 0.9607684047598588,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9621239806476393,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663867730843394,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9714832494133397,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778429630555605,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792020979680514,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823649541327828,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839632636622379,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885444170550296,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988722067914511,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906201480250609,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914467373960897,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914156080860841,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925349652767181,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947677744846595,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960122720191353,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956728737605246,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976655917340204,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989475778451091,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.mlp": [
+ {
+ "accuracy": 0.9344079180767662,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9363654224496138,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9464244466078908,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.949677941046263,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9666333888706408,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9693266435673362,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738119699453053,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823580603850516,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839419157881486,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829967178796467,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851176817166177,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913444832751626,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925757913213027,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952128447200123,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956028873198911,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965165018251068,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986981473078853,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.self_attn": [
+ {
+ "accuracy": 0.9637252343328375,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9681294089869449,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9720067915163542,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780595569234145,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818926375163229,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.983789291821028,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857705596246218,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883635271536676,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892580219005284,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903634527796193,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919248050764987,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925182744076377,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928378345150697,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993434622099525,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956962799555377,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966051170700475,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965035299721517,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981084914976045,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990557916462421,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.mlp": [
+ {
+ "accuracy": 0.9348327172429938,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9367191917017886,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.946221113204956,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9492685983055517,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668095362813849,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9695858798528972,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738628927030062,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824856519699097,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840958338034781,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831020471296812,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852162332911241,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913900729856993,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926306377900275,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952453198401552,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995626064115449,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964829377437893,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986890859313702,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.self_attn": [
+ {
+ "accuracy": 0.9714759996062831,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725595022502699,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9750622228572243,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802378118038177,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839129416566146,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852010858686346,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879454852719056,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989517576600376,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990580940717145,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910664511354346,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924246402163255,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930665791034698,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932031694211458,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938661609041063,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959201918620812,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969658649673587,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966594325868707,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998376131645943,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990869438961932,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.mlp": [
+ {
+ "accuracy": 0.9350208169535587,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9368643384230764,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9461624747828433,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9491328126505801,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9668537378311157,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9695217327067727,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9736829180466501,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825943410396576,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841369641454596,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831159303062841,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852140985037151,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914109902946573,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926289304306632,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952884739951083,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956331715772027,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964577053722582,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987205392435977,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.self_attn": [
+ {
+ "accuracy": 0.9719412954230058,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9736335308928239,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766586391549361,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807092516045821,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984172301857095,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857658634060308,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878680172719454,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900504766326201,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910781673694912,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915228376263067,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925665902464014,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932277061437306,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936478930084329,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944399293316039,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962719918081635,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971956577348081,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996954981041582,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985512560723644,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991178203766283,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.mlp": [
+ {
+ "accuracy": 0.93385910987854,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9357272261067441,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9448371749175222,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9477492853214866,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662193593225981,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689557834675437,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9730430214028609,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822966644638463,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.983871047434054,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827922488513746,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849401806530199,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912383870074624,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924882524891904,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951978192517632,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955399840285903,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963431609304327,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986871746613791,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.self_attn": [
+ {
+ "accuracy": 0.9652680880145023,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9673756329636825,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9709104770108273,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9764865103520846,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812326494016146,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828852556253734,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9853570225991701,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875031937109796,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891682706381145,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896239881452761,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912248520474685,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991856124056013,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992025624764593,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925917222311622,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953409527477465,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964477788460883,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996201994387727,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980709372382415,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998939411910741,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.mlp": [
+ {
+ "accuracy": 0.9320079778370104,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9339008268557096,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9431166397897821,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9460634620566117,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9652457990144429,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9680674452530711,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972196240174143,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981738983016265,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834052183126148,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823009575668135,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845096029733357,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910013087485966,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992275280387778,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995060599163959,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954262787574216,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962421534092802,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986511737500366,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.self_attn": [
+ {
+ "accuracy": 0.9686387306765506,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9704228639602661,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9735945808260065,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786536740629297,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830326281095806,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844461287322798,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869695682274667,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98865828074907,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902413291366476,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904492262162661,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920749099631059,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928652071639111,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993022233247757,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937209855569037,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99576136469841,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968380445712491,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99642348171849,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980615626432394,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990054697386528,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.mlp": [
+ {
+ "accuracy": 0.9322487617793837,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9341570954573781,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9430877911417108,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.94595383970361,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9653245963548359,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9681785702705383,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721676958234686,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818380412302519,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834828094432229,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823390904225802,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845634306731977,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910115858441905,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923024781440434,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995087983576875,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99540931025618,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961875551625302,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986548473764407,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.self_attn": [
+ {
+ "accuracy": 0.9672465481256184,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9734869442488018,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9777420335694363,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812731836971483,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850931685221823,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859906152675026,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879635674388785,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894536192479887,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908115393237064,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907970130443573,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928352236747742,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936370943721972,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937860467716267,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943977853185252,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963830678086532,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971408577341783,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969189941490951,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983937318779921,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991366227990702,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.mlp": [
+ {
+ "accuracy": 0.9311226731852481,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9330688777722811,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9418463393261558,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9446893114792674,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647976285532901,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9676653209485506,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9715805743869982,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815089326155814,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831483489588687,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820731721426311,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843146722567709,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908856820119055,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921822187147642,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994994230568409,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953602097536388,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961322510713025,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986150749027729,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.self_attn": [
+ {
+ "accuracy": 0.9672630178300958,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9686856897253739,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9719158191429941,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774190752129805,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981899757134287,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836119930995139,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862167443099775,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883776660028257,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894652099985826,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901125446746224,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916301981398934,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923459903190011,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925502373983985,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932773152464315,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955234072710338,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965548127105361,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963531109847521,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980902646325136,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990329550285089,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.mlp": [
+ {
+ "accuracy": 0.9300580338427895,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9320240146235416,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9408736354426334,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9438350200653076,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9642015287750646,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9671121961192081,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.971108897736198,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811352083557531,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982832604332974,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817533932234112,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840378761291504,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907146841287613,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920420505498585,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994890710633052,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952684715390205,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99606142193079,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985904811244262,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.self_attn": [
+ {
+ "accuracy": 0.9713034849417838,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9724274277687073,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9763069874361942,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802831740755784,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839784314757899,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856085730226416,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875450338187971,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989616774414715,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909821631092774,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913692427308936,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924966739980798,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933355231034128,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934462383389473,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943566047831586,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959307426684781,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971289244529448,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964740174381357,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984022846543475,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989775311397878,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.mlp": [
+ {
+ "accuracy": 0.9290354251861572,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9310858814339888,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9398796370154933,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9428846522381431,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9636627341571607,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9666297780840021,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.970631436297768,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807651262534293,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825126748335989,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814871769202383,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838033506744787,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905784192838167,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919211872314152,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947914818399831,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952012947515437,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960005079445086,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985461396802413,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.self_attn": [
+ {
+ "accuracy": 0.965212134938491,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9664586688342848,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9713910692616513,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9770593172625491,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982042593391318,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827434514698229,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874151851001539,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881809534210908,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895271392245042,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897681945248654,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912002902281912,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918816873901769,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922522839746977,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929815491563395,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957097894267032,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964776497922445,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966378035513979,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980394367716814,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99908956071656,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.mlp": [
+ {
+ "accuracy": 0.9257800390845851,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9280076152399966,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9373946315363834,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9405572163431268,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9620497007119029,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9651854979364496,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9694157838821411,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9799247086048126,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817130000967729,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9806740785899916,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831020769320036,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901778031336633,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915729698381925,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945622062996814,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950031473448402,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958452315706956,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984902988531088,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.self_attn": [
+ {
+ "accuracy": 0.9528645496619375,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9595024773949071,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9632406015145152,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9698647197924162,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774173890289507,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786606603547147,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826990930657638,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841575842154654,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852249261580015,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860795689256567,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988693588658383,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892414549463674,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887878526198236,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897873080090472,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940945788433677,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995586701521748,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950624880822081,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974328663788343,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986484566409337,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.mlp": [
+ {
+ "accuracy": 0.9261151489458586,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9283229928267629,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.937567390893635,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9407319959841276,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9621833813817877,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.965301789735493,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9695051563413519,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9799620838541734,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817650569112677,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807082696964866,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831346728299794,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901699110081321,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991588358032076,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945601977800068,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949899731498015,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958389632795986,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984891503853234,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.self_attn": [
+ {
+ "accuracy": 0.9637649843567296,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678694637198197,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9723027850452223,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9775420866514507,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822943492939598,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828678745972482,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865706437512448,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873099648638776,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890254477137014,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895548051909396,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913361182338313,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920738394323149,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922327626692621,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930461739238939,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995366567059567,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964019942440485,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961361896834875,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978972965557324,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989390161476637,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.mlp": [
+ {
+ "accuracy": 0.9236757002378765,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9260791979337993,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9354130155161807,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9387200694335134,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9609484107870805,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9641896172573692,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9684486671497947,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791461072470012,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810396574045482,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.980084838051545,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825875978720816,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898584022333747,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913142276437659,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943278745601052,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994841305440978,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957092741602346,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984073261485288,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.self_attn": [
+ {
+ "accuracy": 0.956012487411499,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9580446042512593,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9631805451292741,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9699020448483919,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774747977131292,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779773354530334,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832924259336371,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839070956957968,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863803323946501,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865788833091134,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887469661863226,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990000550684176,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898044968906202,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912598125244442,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939851266773123,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99514959438851,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949467797812662,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967136245809103,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998618967438999,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.mlp": [
+ {
+ "accuracy": 0.9195330080233122,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9220343953684756,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9320933191399825,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.935702173333419,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9586760589950963,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9621404409408569,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9667895153949135,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.977887467334145,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9799073354194039,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9788575956695958,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98153450300819,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891955977992007,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907708364097696,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939752079938587,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944876840240077,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954740424689493,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983161558446131,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.self_attn": [
+ {
+ "accuracy": 0.9457822975359464,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9494810072999251,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9558575247463427,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9661487811490109,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9734095931053162,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738568694967973,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829165621807701,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834037862325969,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984930578031038,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859291973866915,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866788591209211,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876761820755506,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886428007954046,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98967665590738,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937278196999901,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947676109640222,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954474372299094,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997130285752447,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987709333041781,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.mlp": [
+ {
+ "accuracy": 0.9116299152374268,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9149068154786762,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9258868945272345,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9296751775239643,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9544329329540855,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9586459366898787,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9637097433993691,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9756965182329479,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778229060925936,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766362058488947,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9797434006866655,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880022367364482,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898504073682585,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932047850207278,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938410226451723,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948929305139341,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980830842334973,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.self_attn": [
+ {
+ "accuracy": 0.9416424286992926,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563987192354704,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9615881411652816,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687537048992357,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758568500217638,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766973115895924,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824763395284352,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834360888129786,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849492687928049,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858368431266985,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877610151704989,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888336383982709,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891553740752371,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903402265749479,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938771528633017,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950538179592082,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950365000649503,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99730155773853,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986626674470148,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.mlp": [
+ {
+ "accuracy": 0.8969918740423102,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9015716000607139,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9124114764364142,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9162065167176097,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9474311847435801,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9529120890717757,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9578034407214114,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9711744502971047,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9737772753364161,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726209295423407,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.976600402279904,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856774352098766,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880886987635964,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912079886386269,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928011157010731,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938701124567735,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975897689398966,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.self_attn": [
+ {
+ "accuracy": 0.952824216139944,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9554673997979415,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9625231121715746,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9719581823599966,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.978102268357026,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.978615790605545,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844670264344466,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850967871515375,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858021877313915,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866087122967369,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886866823623055,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896926519117857,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903634237615686,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909623484862479,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944304765055054,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995237123025091,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955461327182619,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972703123563215,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985830446607188,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.mlp": [
+ {
+ "accuracy": 0.8741740803969533,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.8797431494060316,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.8903206398612574,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.8952234983444214,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9357856072877583,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9418359743921381,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9465133641895495,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9646393970439309,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678246473011217,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9670271151944211,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9714797421505577,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829796489916349,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855671054438541,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898632921670613,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912174767569492,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924364654641402,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967414707337555,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.norm.norm": null,
+ "lm_head.linear": null
+ },
+ "last_module_idx": 82
+}
\ No newline at end of file
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..79fe50f48e72f95e85b1059c699a10762feae6f9
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,370 @@
+{
+ "metadata": {
+ "total_size": 24495564800
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00005-of-00005.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00005.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00005.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00005.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.28.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.28.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.28.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.29.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.29.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.29.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.30.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.30.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.30.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.31.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.31.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.31.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.32.mlp.down_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.32.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.32.mlp.up_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00005.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.33.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.33.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00004-of-00005.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.34.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.34.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.34.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.35.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.35.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.35.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.36.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.36.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.36.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.37.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.37.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.37.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.38.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.38.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.38.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.39.mlp.down_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.39.mlp.gate_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.39.mlp.up_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00005-of-00005.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00005-of-00005.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00005.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00005.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00005.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00005.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00005.safetensors",
+ "model.norm.weight": "model-00005-of-00005.safetensors"
+ }
+}
diff --git a/output-00001-of-00002.safetensors b/output-00001-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..89a6b5cfea2ec7e3145a39596ada83ca89bfe7f0
--- /dev/null
+++ b/output-00001-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41f9003cce06ca69eb166a9dde8eac1067bfc04de04c1b8d647a2c0a6b559d79
+size 8588624916
diff --git a/output-00002-of-00002.safetensors b/output-00002-of-00002.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..87623d29cc78771b8c36fa5c785654be507403f7
--- /dev/null
+++ b/output-00002-of-00002.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:965332cf5af8204f7754312e2eb059f5b202c9232e5e0fca84f55d4780d237e6
+size 3046243588
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..451134b2ddc2e78555d1e857518c54b4bdc2e87d
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..ec5acd742ca3df11b3b7933152376b737565842d
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,409625 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 0,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 1,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 2,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 3,
+ "content": "[INST]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 4,
+ "content": "[/INST]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 5,
+ "content": "[AVAILABLE_TOOLS]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 6,
+ "content": "[/AVAILABLE_TOOLS]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 7,
+ "content": "[TOOL_RESULTS]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 8,
+ "content": "[/TOOL_RESULTS]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 9,
+ "content": "[TOOL_CALLS]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 10,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 11,
+ "content": "[PREFIX]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 12,
+ "content": "[MIDDLE]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 13,
+ "content": "[SUFFIX]",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 14,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 15,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 16,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 17,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 18,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 19,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 20,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 21,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 22,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 23,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 24,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 25,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 26,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 27,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 28,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 29,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 30,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 31,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 32,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 33,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 34,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 35,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 36,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 37,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 38,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 39,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 40,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 41,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 42,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 43,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 44,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 45,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 46,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 47,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 48,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 49,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 50,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 51,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 52,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 53,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 54,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 55,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 56,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 57,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 58,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 59,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 60,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 61,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 62,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 63,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 64,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 65,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 66,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 67,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 68,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 69,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 70,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 71,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 72,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 73,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 74,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 75,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 76,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 77,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 78,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 79,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 80,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 81,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 82,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 83,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 84,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 85,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 86,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 87,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 88,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 89,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 90,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 91,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 92,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 93,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 94,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 95,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 96,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 97,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 98,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 99,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 100,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 101,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 102,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 103,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 104,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 105,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 106,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 107,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 108,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 109,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 110,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 111,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 112,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 113,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 114,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 115,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 116,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 117,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 118,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 119,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 120,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 121,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 122,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 123,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 124,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 125,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 126,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 127,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 128,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 129,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 130,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 131,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 132,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 133,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 134,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 135,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 136,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 137,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 138,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 139,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 140,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 141,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 142,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 143,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 144,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 145,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 146,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 147,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 148,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 149,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 150,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 151,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 152,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 153,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 154,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 155,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 156,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 157,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 158,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 159,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 160,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 161,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 162,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 163,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 164,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 165,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 166,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 167,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 168,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 169,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 170,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 171,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 172,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 173,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 174,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 175,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 176,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 177,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 178,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 179,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 180,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 181,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 182,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 183,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 184,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 185,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 186,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 187,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 188,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 189,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 190,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 191,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 192,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 193,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 194,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 195,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 196,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 197,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 198,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 199,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 200,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 201,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 202,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 203,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 204,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 205,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 206,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 207,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 208,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 209,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 210,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 211,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 212,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 213,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 214,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 215,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 216,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 217,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 218,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 219,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 220,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 221,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 222,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 223,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 224,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 225,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 226,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 227,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 228,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 229,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 230,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 231,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 232,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 233,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 234,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 235,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 236,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 237,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 238,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 239,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 240,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 241,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 242,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 243,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 244,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 245,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 246,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 247,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 248,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 249,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 250,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 251,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 252,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 253,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 254,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 255,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 256,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 257,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 258,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 259,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 260,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 261,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 262,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 263,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 264,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 265,
+ "content": "