diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..f46df26374236435f846b3156fe9a97837e38f91 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+bagel.png filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1d36ae2a10afc25fbd5cbc4375169af14f24f428
--- /dev/null
+++ b/README.md
@@ -0,0 +1,50 @@
+---
+license: other
+license_name: yi-license
+license_link: https://huggingface.co/01-ai/Yi-34B-200K/blob/main/LICENSE
+datasets:
+- ai2_arc
+- unalignment/spicy-3.1
+- codeparrot/apps
+- facebook/belebele
+- boolq
+- jondurbin/cinematika-v0.1
+- drop
+- lmsys/lmsys-chat-1m
+- TIGER-Lab/MathInstruct
+- cais/mmlu
+- Muennighoff/natural-instructions
+- openbookqa
+- piqa
+- Vezora/Tested-22k-Python-Alpaca
+- cakiki/rosetta-code
+- Open-Orca/SlimOrca
+- spider
+- squad_v2
+- migtissera/Synthia-v1.3
+- datasets/winogrande
+- nvidia/HelpSteer
+- Intel/orca_dpo_pairs
+- unalignment/toxic-dpo-v0.1
+- jondurbin/truthy-dpo-v0.1
+- allenai/ultrafeedback_binarized_cleaned
+- Squish42/bluemoon-fandom-1-1-rp-cleaned
+- LDJnr/Capybara
+- JULIELab/EmoBank
+- kingbri/PIPPA-shareGPT
+---
+quant of [jondurbin's](https://huggingface.co/jondurbin) [bagel-dpo-34b-v0.2](https://huggingface.co/jondurbin/bagel-dpo-34b-v0.2)
+
+fits into 24gb with 16k context on windows
+
+```
+python3 convert.py \
+ -i /input/jondurbin_bagel-dpo-34b-v0.2/ \
+ -c /input/pippa_cleaned/0000.parquet \
+ -o /output/temp/ \
+ -cf /output/bagel-dpo-34b-v0.2-4.65bpw-h6-exl2/ \
+ -l 8192 \
+ -ml 8192 \
+ -b 4.65 \
+ -hb 6
+```
\ No newline at end of file
diff --git a/bagel.png b/bagel.png
new file mode 100644
index 0000000000000000000000000000000000000000..3a59d2b5b091a8de83da5e48ffea41046af4e5b7
--- /dev/null
+++ b/bagel.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d922a78a6f7d2de37f094d9eef558fd87dfc8e8df293c195aae27cb402b4160
+size 2152496
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e5da9315ac318ecf7770bc6d524fa784371a7fe0
--- /dev/null
+++ b/config.json
@@ -0,0 +1,29 @@
+{
+ "_name_or_path": "bagel-final-34b-v0.2",
+ "architectures": [
+ "LlamaForCausalLM"
+ ],
+ "attention_bias": false,
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 7168,
+ "initializer_range": 0.02,
+ "intermediate_size": 20480,
+ "max_position_embeddings": 200000,
+ "model_type": "llama",
+ "num_attention_heads": 56,
+ "num_hidden_layers": 60,
+ "num_key_value_heads": 8,
+ "pad_token_id": 0,
+ "pretraining_tp": 1,
+ "rms_norm_eps": 1e-05,
+ "rope_scaling": null,
+ "rope_theta": 5000000.0,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.36.2",
+ "use_cache": false,
+ "vocab_size": 64000
+}
\ No newline at end of file
diff --git a/measurement_pippa.json b/measurement_pippa.json
new file mode 100644
index 0000000000000000000000000000000000000000..20582e547b63b7a2cb384d90c1b80f813b2abeb9
--- /dev/null
+++ b/measurement_pippa.json
@@ -0,0 +1,117067 @@
+{
+ "measurement": {
+ "model.layers.0.self_attn": [
+ {
+ "accuracy": 0.9916511178016663,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921920299530029,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961221218109131,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965435266494751,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972853660583496,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975286722183228,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976813197135925,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978165626525879,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971957206726074,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972090125083923,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986101388931274,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984003901481628,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986003041267395,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984079599380493,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989661574363708,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988599419593811,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989889860153198,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989380240440369,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999148428440094,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.0.mlp": [
+ {
+ "accuracy": 0.9970613718032837,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971551895141602,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99762362241745,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977359175682068,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988901019096375,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989292621612549,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991437196731567,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994170069694519,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994422793388367,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994399547576904,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994595646858215,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996733665466309,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996249079704285,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997578859329224,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997711181640625,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997933506965637,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9998049139976501,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.self_attn": [
+ {
+ "accuracy": 0.9966627359390259,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996977686882019,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975985288619995,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976401925086975,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986175298690796,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985560774803162,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986651539802551,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986029863357544,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999005913734436,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990100264549255,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999273955821991,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993183612823486,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992749094963074,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993254542350769,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993851184844971,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993656277656555,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993861317634583,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993967413902283,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994475245475769,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.mlp": [
+ {
+ "accuracy": 0.9958555102348328,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959710836410522,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966340661048889,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968754649162292,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982879757881165,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983662366867065,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986840486526489,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990355968475342,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990940093994141,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991186857223511,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991622567176819,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994717240333557,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994187355041504,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995893239974976,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999641478061676,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996737837791443,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996898770332336,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.self_attn": [
+ {
+ "accuracy": 0.9939479827880859,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947527050971985,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960957169532776,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970780611038208,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975697994232178,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976674914360046,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981937408447266,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982945919036865,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99843829870224,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984568953514099,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987914562225342,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988304376602173,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989317655563354,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989108443260193,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992778301239014,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991697072982788,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993183016777039,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992190003395081,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994603395462036,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.mlp": [
+ {
+ "accuracy": 0.9810635447502136,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831270575523376,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.97730952501297,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9773611426353455,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964408278465271,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961507320404053,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963015913963318,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974517226219177,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972068071365356,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975550770759583,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997525155544281,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982824921607971,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997968316078186,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980937838554382,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981951117515564,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982119202613831,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982720613479614,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.self_attn": [
+ {
+ "accuracy": 0.9850714206695557,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851982593536377,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906792044639587,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908775687217712,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909778237342834,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991298258304596,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909505844116211,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912198185920715,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922248125076294,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922143220901489,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954545497894287,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969878792762756,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954708218574524,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970097541809082,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970909953117371,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998207688331604,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971085786819458,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993825554847717,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992520809173584,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.mlp": [
+ {
+ "accuracy": 0.9962160587310791,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962687492370605,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972644448280334,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975840449333191,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998160719871521,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983025193214417,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987209439277649,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990302324295044,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991318583488464,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990527629852295,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991665482521057,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995005130767822,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995231032371521,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996774792671204,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997031092643738,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997596740722656,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997941255569458,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.self_attn": [
+ {
+ "accuracy": 0.9789657592773438,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9795272946357727,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871828556060791,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877371191978455,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880064129829407,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893637299537659,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881277084350586,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894390106201172,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925371408462524,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926104545593262,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940610527992249,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959754347801208,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941068887710571,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960014224052429,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962995648384094,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981227517127991,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963103532791138,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992612600326538,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989907145500183,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.mlp": [
+ {
+ "accuracy": 0.9950876235961914,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951696991920471,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962056875228882,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965366125106812,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975985884666443,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977855682373047,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982243776321411,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987429976463318,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988718628883362,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998769998550415,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989224076271057,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993628263473511,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993991851806641,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996016025543213,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996347427368164,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999699056148529,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9997648000717163,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.self_attn": [
+ {
+ "accuracy": 0.985052227973938,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9853646159172058,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890764951705933,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899033904075623,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899194240570068,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914783239364624,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900334477424622,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916262030601501,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930728673934937,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931014776229858,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946414232254028,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961720108985901,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994666337966919,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961966872215271,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967676401138306,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982900619506836,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967824816703796,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994010329246521,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991424679756165,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.mlp": [
+ {
+ "accuracy": 0.9936901330947876,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993807315826416,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950383305549622,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954373240470886,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969395399093628,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971811175346375,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977120161056519,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983936548233032,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985543489456177,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984344244003296,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986230134963989,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991835355758667,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992206692695618,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999479353427887,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995097517967224,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995821714401245,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996575713157654,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.self_attn": [
+ {
+ "accuracy": 0.980290949344635,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.980672299861908,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857917428016663,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870554208755493,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873853325843811,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895307421684265,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877086877822876,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898728728294373,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919826984405518,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919273257255554,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944682121276855,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960171580314636,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945071935653687,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961146712303162,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966794848442078,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978813529014587,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967237710952759,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986943006515503,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99873948097229,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.mlp": [
+ {
+ "accuracy": 0.9914459586143494,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915987253189087,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933344125747681,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939113855361938,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99575275182724,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960957765579224,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968522191047668,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977606534957886,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997985303401947,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978219270706177,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980926513671875,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988642930984497,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989274740219116,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992731213569641,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999305009841919,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994059205055237,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995073676109314,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.self_attn": [
+ {
+ "accuracy": 0.9812463521957397,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814854264259338,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857867956161499,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873936772346497,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987537145614624,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895042181015015,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880678653717041,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900630116462708,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926815032958984,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927049875259399,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938641786575317,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961642026901245,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993995189666748,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962625503540039,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958221316337585,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980148077011108,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958886504173279,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999016284942627,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988479614257812,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.mlp": [
+ {
+ "accuracy": 0.9888496398925781,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890583157539368,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912421703338623,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991955041885376,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944193363189697,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948745369911194,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958158135414124,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970859289169312,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973926544189453,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971431493759155,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975166320800781,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985433220863342,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986504912376404,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991077184677124,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991695284843445,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993101358413696,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994832277297974,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.self_attn": [
+ {
+ "accuracy": 0.9778448343276978,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779653549194336,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850366711616516,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860543608665466,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861450791358948,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880096316337585,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864541292190552,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882301092147827,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921082258224487,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921477437019348,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925978183746338,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956312775611877,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926044344902039,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956944584846497,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952021241188049,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978322982788086,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995220422744751,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991552829742432,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987678527832031,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.mlp": [
+ {
+ "accuracy": 0.9871007204055786,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873687624931335,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897124767303467,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904783368110657,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993538498878479,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994062602519989,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950747489929199,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966219067573547,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996973991394043,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966927766799927,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997123658657074,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983145594596863,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998435914516449,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989666938781738,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990367293357849,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991884231567383,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993996024131775,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.self_attn": [
+ {
+ "accuracy": 0.9754169583320618,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754309058189392,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9835441708564758,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984424889087677,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852610230445862,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987445592880249,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985526442527771,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877253174781799,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914652705192566,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914928078651428,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992660403251648,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951600432395935,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927177429199219,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950599670410156,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950554966926575,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971758723258972,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950965046882629,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987868666648865,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986691474914551,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.mlp": [
+ {
+ "accuracy": 0.9849107265472412,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852250814437866,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879682064056396,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888790249824524,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924201965332031,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930415749549866,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942330121994019,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996029794216156,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964540004730225,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996124267578125,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966376423835754,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980324506759644,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981871843338013,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988002181053162,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988774657249451,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990585446357727,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993076324462891,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.self_attn": [
+ {
+ "accuracy": 0.9717433452606201,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9718511700630188,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9806590676307678,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826663732528687,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832072854042053,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863473176956177,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837782382965088,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869009852409363,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990390956401825,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904893636703491,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910234212875366,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937229156494141,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911441206932068,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938380122184753,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99455726146698,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974084496498108,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946025013923645,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985674023628235,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982517957687378,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.mlp": [
+ {
+ "accuracy": 0.9841001629829407,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844397902488708,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98731529712677,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988273024559021,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920425415039062,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992662250995636,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939187169075012,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957864284515381,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962202310562134,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959070682525635,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996414840221405,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978866577148438,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979974031448364,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986558556556702,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987788796424866,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989616870880127,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992059469223022,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.self_attn": [
+ {
+ "accuracy": 0.9697645902633667,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9705995917320251,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9804325103759766,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826233386993408,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9835049510002136,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846320748329163,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842091202735901,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9853500723838806,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877658486366272,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877700209617615,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915499091148376,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993980348110199,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917001128196716,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941314458847046,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994657039642334,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969836473464966,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947617650032043,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984461069107056,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983070492744446,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.mlp": [
+ {
+ "accuracy": 0.9814408421516418,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818342924118042,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851564168930054,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862549901008606,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906224608421326,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913830757141113,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928271174430847,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950851798057556,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955996870994568,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951958656311035,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958256483078003,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975569248199463,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99774169921875,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985038042068481,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985867142677307,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998802125453949,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991092681884766,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.self_attn": [
+ {
+ "accuracy": 0.9729958176612854,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9732123613357544,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798449277877808,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808191657066345,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9816834926605225,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846607446670532,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822173714637756,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852349162101746,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906075596809387,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908429980278015,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919682741165161,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949333071708679,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920682311058044,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950897693634033,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948654770851135,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974560737609863,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949308037757874,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986845850944519,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983776211738586,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.mlp": [
+ {
+ "accuracy": 0.9798234701156616,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802812337875366,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837642312049866,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848872423171997,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898244142532349,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906537532806396,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921519756317139,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947081804275513,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952502846717834,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948007464408875,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954928755760193,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973839521408081,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976043105125427,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984415173530579,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985295534133911,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987636208534241,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991443753242493,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.self_attn": [
+ {
+ "accuracy": 0.9688483476638794,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9693475961685181,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9806172251701355,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817233085632324,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822681546211243,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864149689674377,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828275442123413,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869187474250793,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921303987503052,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921066164970398,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925168752670288,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948770403862,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925761222839355,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950173497200012,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944653511047363,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970582127571106,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945113062858582,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998593807220459,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982120990753174,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.mlp": [
+ {
+ "accuracy": 0.978751540184021,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792397618293762,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827682375907898,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838986992835999,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892674684524536,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901453256607056,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916596412658691,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944195747375488,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949862957000732,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945099949836731,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952414631843567,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972326755523682,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997459888458252,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983398914337158,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984258413314819,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986588358879089,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990577101707458,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.self_attn": [
+ {
+ "accuracy": 0.9642239212989807,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9643794894218445,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9770732522010803,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9785412549972534,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9788427352905273,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811563491821289,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9793720245361328,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981660783290863,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876208305358887,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876294732093811,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895809888839722,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931020140647888,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896796941757202,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932088255882263,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930890202522278,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963339567184448,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993137776851654,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983342885971069,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978730082511902,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.mlp": [
+ {
+ "accuracy": 0.9773637652397156,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778916239738464,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815497994422913,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827172756195068,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885438084602356,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894993305206299,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910562634468079,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940300583839417,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946380853652954,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941282868385315,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949139356613159,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970252513885498,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972600340843201,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981905817985535,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982877969741821,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985237717628479,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989416599273682,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.self_attn": [
+ {
+ "accuracy": 0.9553554654121399,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9548805952072144,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9753460884094238,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765229821205139,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9762260913848877,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794961810112,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.976487934589386,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9797762036323547,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875655174255371,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877501726150513,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884263277053833,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928598999977112,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885321259498596,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931265115737915,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927502870559692,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964757561683655,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927933216094971,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984723925590515,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976894855499268,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.mlp": [
+ {
+ "accuracy": 0.9765742421150208,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9771353006362915,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808671474456787,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820517897605896,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881805777549744,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891487956047058,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907219409942627,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938113689422607,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944289326667786,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939252138137817,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947178959846497,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968975186347961,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971106648445129,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980758428573608,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981984496116638,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984318017959595,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988492131233215,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.self_attn": [
+ {
+ "accuracy": 0.9640137553215027,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9643221497535706,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9759628176689148,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9776574969291687,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9775779843330383,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800313711166382,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9777981042861938,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9804221391677856,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986173689365387,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869149923324585,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891378879547119,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927849769592285,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893239140510559,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927408695220947,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929056763648987,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960634112358093,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929739236831665,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979949593544006,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979762434959412,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.mlp": [
+ {
+ "accuracy": 0.9769737124443054,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9775112867355347,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981047511100769,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821756482124329,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883458018302917,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893138408660889,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990817129611969,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939297437667847,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994537889957428,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940332770347595,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948204159736633,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969633221626282,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972037076950073,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998138964176178,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982488751411438,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984735250473022,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989057779312134,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.self_attn": [
+ {
+ "accuracy": 0.9629348516464233,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.96349036693573,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9767098426818848,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786889553070068,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792500138282776,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982118546962738,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801573753356934,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830900430679321,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868433475494385,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869551062583923,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895917177200317,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933220148086548,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897608160972595,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9935438632965088,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934016466140747,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964819550514221,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993502676486969,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978945851325989,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979075193405151,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.mlp": [
+ {
+ "accuracy": 0.9748419523239136,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754326343536377,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9793248176574707,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9805814623832703,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872764945030212,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883196949958801,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989975094795227,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933458566665649,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940057396888733,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934662580490112,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943242073059082,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966702461242676,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969120621681213,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979459643363953,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998068630695343,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983142018318176,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987741708755493,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.self_attn": [
+ {
+ "accuracy": 0.9628459215164185,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9634732007980347,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9749271273612976,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9760324954986572,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979577898979187,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9819086790084839,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801782369613647,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825519323348999,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880121350288391,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883525371551514,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886246919631958,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926262497901917,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887056946754456,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927281141281128,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931390285491943,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960150718688965,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932054877281189,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977235794067383,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975818991661072,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.mlp": [
+ {
+ "accuracy": 0.9738926887512207,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9745210409164429,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.978499174118042,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9797469973564148,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868363738059998,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879039525985718,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895743131637573,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931322336196899,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937925338745117,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932336807250977,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941176176071167,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965413212776184,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967857599258423,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978538155555725,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980253577232361,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982752799987793,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987705945968628,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.self_attn": [
+ {
+ "accuracy": 0.9629545211791992,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9631220102310181,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778788685798645,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9797654747962952,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801275134086609,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828494787216187,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9809409379959106,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836323261260986,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900205731391907,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900782108306885,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904950261116028,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934985637664795,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907618165016174,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937801361083984,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943070411682129,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967566728591919,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994412362575531,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980895519256592,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979689121246338,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.mlp": [
+ {
+ "accuracy": 0.9716742634773254,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9724240899085999,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9768146872520447,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9781484603881836,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858585596084595,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870020747184753,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887670874595642,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926298260688782,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993309736251831,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927278161048889,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936689734458923,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962713718414307,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965094923973083,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976646900177002,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978150129318237,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980709552764893,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985783100128174,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.self_attn": [
+ {
+ "accuracy": 0.977441668510437,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779864549636841,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821662306785583,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850450158119202,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987686276435852,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884766340255737,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895305037498474,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904910922050476,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924757480621338,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926350712776184,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940571188926697,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952369332313538,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944952726364136,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956393837928772,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969350099563599,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975558519363403,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972670674324036,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985073804855347,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989872574806213,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.mlp": [
+ {
+ "accuracy": 0.9580497741699219,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9600925445556641,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9639496803283691,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.964934766292572,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856927990913391,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865694642066956,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881517887115479,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921419620513916,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925327897071838,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925720691680908,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930718541145325,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960758686065674,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960530400276184,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972312450408936,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975894689559937,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997810959815979,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981446266174316,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.self_attn": [
+ {
+ "accuracy": 0.9805434942245483,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807909727096558,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985090434551239,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876002669334412,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892609119415283,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891257882118225,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941625595092773,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940098524093628,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994929313659668,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951342344284058,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949448704719543,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953770637512207,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962127208709717,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996604323387146,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978021383285522,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979735612869263,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981672763824463,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986333250999451,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989866018295288,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.mlp": [
+ {
+ "accuracy": 0.9719057679176331,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972592294216156,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.977016806602478,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9784000515937805,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98581862449646,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869733452796936,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988727867603302,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923542141914368,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930342435836792,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992723822593689,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936558604240417,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962343573570251,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964867234230042,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975631237030029,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977060556411743,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979504346847534,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983925819396973,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.self_attn": [
+ {
+ "accuracy": 0.9767357110977173,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786844253540039,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826850295066833,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857801795005798,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880805015563965,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888644218444824,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923199415206909,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926518797874451,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937987923622131,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940025806427002,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942276477813721,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951319098472595,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952743053436279,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958620667457581,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966883659362793,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971606135368347,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972162842750549,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976410269737244,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979794025421143,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.mlp": [
+ {
+ "accuracy": 0.9441071152687073,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9443512558937073,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9463033080101013,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9469476342201233,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780725836753845,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815011024475098,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827064871788025,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831029772758484,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849357604980469,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863700270652771,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866855144500732,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987862765789032,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880084991455078,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881923794746399,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919127821922302,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919906258583069,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992071807384491,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.self_attn": [
+ {
+ "accuracy": 0.9802572131156921,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810832738876343,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.983765184879303,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881438612937927,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901432394981384,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904407262802124,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934489727020264,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938297271728516,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948173761367798,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950340986251831,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994924783706665,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954949617385864,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955127835273743,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960339069366455,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975778460502625,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978936910629272,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980992078781128,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998475193977356,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998954176902771,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.mlp": [
+ {
+ "accuracy": 0.9703905582427979,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9710678458213806,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.975927472114563,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9775263667106628,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850654006004333,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862971305847168,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883233308792114,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921771287918091,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929518699645996,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923434853553772,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933462142944336,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960786700248718,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963791370391846,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975574016571045,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977429509162903,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980450868606567,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985678791999817,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.self_attn": [
+ {
+ "accuracy": 0.9793965816497803,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801428318023682,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823439121246338,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867308139801025,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893315434455872,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895222187042236,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992841362953186,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930751919746399,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936861395835876,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940101504325867,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947925209999084,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952341914176941,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953778386116028,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957927465438843,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99747234582901,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976552724838257,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998105525970459,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982734322547913,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988120198249817,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.mlp": [
+ {
+ "accuracy": 0.9681962728500366,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689252376556396,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9742778539657593,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9760537147521973,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839421510696411,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852699041366577,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875349998474121,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915992021560669,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992448091506958,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917809367179871,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928660988807678,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958334565162659,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961573481559753,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974536299705505,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997660219669342,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980112314224243,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986050724983215,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.self_attn": [
+ {
+ "accuracy": 0.9768656492233276,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774819016456604,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801135063171387,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852016568183899,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878225326538086,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878743290901184,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919320940971375,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920068979263306,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932944774627686,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9935595393180847,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941152930259705,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947041869163513,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947695136070251,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954301118850708,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970114231109619,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973185062408447,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976526498794556,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979603886604309,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998720645904541,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.mlp": [
+ {
+ "accuracy": 0.9649899005889893,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9657994508743286,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9717763662338257,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738039970397949,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982296347618103,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837513566017151,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986298680305481,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906942248344421,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916285276412964,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909341931343079,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921115636825562,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953876733779907,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957159757614136,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971457123756409,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973434805870056,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977266788482666,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983285665512085,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.self_attn": [
+ {
+ "accuracy": 0.972147524356842,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972661554813385,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766034483909607,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815959930419922,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848681688308716,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985066294670105,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892409443855286,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896342158317566,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913256764411926,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912533760070801,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929389357566833,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934121370315552,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993621826171875,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939786195755005,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964055418968201,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966776967048645,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970211386680603,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975183010101318,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984278678894043,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.mlp": [
+ {
+ "accuracy": 0.9632079005241394,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9640237092971802,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9704439043998718,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9726211428642273,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9813827872276306,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9828854203224182,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856656193733215,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902184009552002,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912081956863403,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904671907424927,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917080998420715,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951792359352112,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955222010612488,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970564246177673,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972717761993408,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976998567581177,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998352587223053,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.self_attn": [
+ {
+ "accuracy": 0.9683395624160767,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687543511390686,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733739495277405,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9784289598464966,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829567074775696,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834290742874146,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875460267066956,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878270626068115,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988595187664032,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888255596160889,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915862083435059,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924848675727844,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925302267074585,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931311011314392,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957960844039917,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962929487228394,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996575653553009,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974336624145508,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984481930732727,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.mlp": [
+ {
+ "accuracy": 0.9595074653625488,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9604513645172119,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9676089882850647,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9700256586074829,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794918298721313,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811568260192871,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842206835746765,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892106056213379,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903239607810974,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894925355911255,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908747673034668,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946851134300232,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950800538063049,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967525005340576,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969522953033447,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974217414855957,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981228709220886,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.self_attn": [
+ {
+ "accuracy": 0.9674526453018188,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9684699177742004,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9729474186897278,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9781985282897949,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826101660728455,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982745885848999,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987148106098175,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874547123908997,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894797205924988,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898569583892822,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917556047439575,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926818013191223,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926078915596008,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9935287833213806,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955477714538574,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962506890296936,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962961077690125,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972893595695496,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982264041900635,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.mlp": [
+ {
+ "accuracy": 0.9571065902709961,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9581477046012878,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9655143022537231,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679955840110779,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9782941341400146,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9800519943237305,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832215905189514,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886151552200317,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897580146789551,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888927936553955,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903529286384583,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944030046463013,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948177337646484,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966138601303101,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968137145042419,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973026514053345,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980776906013489,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.self_attn": [
+ {
+ "accuracy": 0.95650315284729,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9562374949455261,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9632623791694641,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.968290388584137,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801536798477173,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807611703872681,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850077033042908,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857496619224548,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874382019042969,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877081513404846,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899751543998718,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906693696975708,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908943772315979,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914349317550659,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943880438804626,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995281994342804,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952034950256348,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965448975563049,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977513551712036,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.mlp": [
+ {
+ "accuracy": 0.9541553258895874,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9552403092384338,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630976915359497,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9657444953918457,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9767487645149231,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786355495452881,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820358157157898,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877897500991821,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890356063842773,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880921840667725,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989658534526825,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939965009689331,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944344162940979,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963566660881042,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996630847454071,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971669316291809,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980174899101257,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.self_attn": [
+ {
+ "accuracy": 0.9644036293029785,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9655157327651978,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9693030714988708,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9764342308044434,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814243316650391,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815596342086792,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876139163970947,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878385663032532,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889872074127197,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893136620521545,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903632998466492,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912707209587097,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913525581359863,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923175573348999,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949055314064026,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955431222915649,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958861470222473,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965891242027283,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978675246238708,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.mlp": [
+ {
+ "accuracy": 0.9529584050178528,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9540717601776123,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9621211886405945,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9648403525352478,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761242866516113,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780545234680176,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98155677318573,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874475002288818,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887403249740601,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877658486366272,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893764853477478,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938425421714783,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994283139705658,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962729215621948,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964813590049744,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970231056213379,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978665113449097,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.self_attn": [
+ {
+ "accuracy": 0.9600362777709961,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9607061743736267,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9659425616264343,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731496572494507,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798722863197327,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803631901741028,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861832857131958,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867093563079834,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881035089492798,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890527725219727,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895679950714111,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904294013977051,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905115962028503,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914331436157227,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950063228607178,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954233169555664,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961314797401428,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996680736541748,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980881810188293,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.mlp": [
+ {
+ "accuracy": 0.9499072432518005,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.95108562707901,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9597283601760864,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9626637697219849,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9744985103607178,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.976579487323761,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803855419158936,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865930676460266,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879910945892334,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869414567947388,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886695742607117,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934355616569519,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939165711402893,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996040403842926,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962763786315918,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968717694282532,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977774024009705,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.self_attn": [
+ {
+ "accuracy": 0.9539197087287903,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9553838968276978,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9594288468360901,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9664390683174133,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772546887397766,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9776601791381836,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840968251228333,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845147728919983,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861506819725037,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869838356971741,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98884516954422,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899385571479797,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899750351905823,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911490678787231,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944490790367126,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952282309532166,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956145882606506,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966425895690918,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978920221328735,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.mlp": [
+ {
+ "accuracy": 0.9482684135437012,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9494894742965698,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9583399295806885,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9613639116287231,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9736423492431641,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.97577965259552,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9797058701515198,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861550331115723,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876024723052979,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865157604217529,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883049726486206,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932431578636169,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937629699707031,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959641098976135,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962319135665894,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968641400337219,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997849702835083,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.self_attn": [
+ {
+ "accuracy": 0.9537039995193481,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9551056027412415,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.961058497428894,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9690839648246765,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9762037992477417,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766023755073547,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9833641052246094,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837989807128906,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843851327896118,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849646091461182,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873390197753906,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888483285903931,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988664448261261,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899761080741882,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931254386901855,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938584566116333,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941686391830444,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952196478843689,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976822733879089,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.mlp": [
+ {
+ "accuracy": 0.9443305730819702,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9456124305725098,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9550294876098633,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9583097100257874,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.971528172492981,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738662242889404,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780750274658203,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850043654441833,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986579418182373,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854276776313782,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873660802841187,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926769733428955,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932425618171692,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955973029136658,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959168434143066,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965938329696655,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976510405540466,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.self_attn": [
+ {
+ "accuracy": 0.9490907788276672,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9504586458206177,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9568060636520386,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647178053855896,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731794595718384,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738990664482117,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9806018471717834,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814165234565735,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844563007354736,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845964312553406,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868177175521851,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988161027431488,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880752563476562,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892337918281555,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933468103408813,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942638874053955,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945242404937744,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956178665161133,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968896508216858,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.mlp": [
+ {
+ "accuracy": 0.9413588047027588,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9427181482315063,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9526336789131165,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9561070203781128,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9700074195861816,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9724559187889099,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9769169092178345,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841926693916321,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985858142375946,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846493005752563,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866793751716614,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992250919342041,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928510189056396,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953077435493469,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956532120704651,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963626861572266,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974408745765686,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.self_attn": [
+ {
+ "accuracy": 0.949528694152832,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9510098099708557,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9562175273895264,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9645105004310608,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9735353589057922,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9741403460502625,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9809249639511108,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815594553947449,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839516878128052,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98442542552948,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867801070213318,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877983331680298,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988017737865448,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890031218528748,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932622909545898,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940621852874756,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994383692741394,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955748915672302,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965977668762207,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.mlp": [
+ {
+ "accuracy": 0.9364281892776489,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9378352165222168,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9486426711082458,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9525243043899536,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9674035310745239,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9700629115104675,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9749814867973328,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827641844749451,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846066236495972,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9833109974861145,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855165481567383,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915716052055359,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922062158584595,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948853254318237,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952430725097656,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960197806358337,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971535205841064,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.self_attn": [
+ {
+ "accuracy": 0.9428128004074097,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9434297680854797,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9510492086410522,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9578351378440857,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9703965187072754,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9708772301673889,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9769477844238281,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778797030448914,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9797470569610596,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9783811569213867,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850874543190002,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870269894599915,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862444996833801,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98827064037323,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924985766410828,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934466481208801,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9935574531555176,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995972752571106,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997107744216919,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.mlp": [
+ {
+ "accuracy": 0.9340739250183105,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9355401992797852,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9464058876037598,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9503659009933472,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662185907363892,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689612984657288,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9739344716072083,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821590185165405,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840452075004578,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827384948730469,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850219488143921,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912996292114258,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99201500415802,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994774580001831,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951696991920471,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995976448059082,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972310066223145,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.self_attn": [
+ {
+ "accuracy": 0.950366735458374,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9515746831893921,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9577349424362183,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9653101563453674,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9743497967720032,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9750314950942993,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9813660979270935,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822225570678711,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845434427261353,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984894871711731,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855368137359619,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869506359100342,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867641925811768,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882293939590454,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920621514320374,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933335781097412,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929977655410767,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949458241462708,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962235689163208,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.mlp": [
+ {
+ "accuracy": 0.9306991100311279,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9322283864021301,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9435347318649292,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9477280378341675,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9644246101379395,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9673413038253784,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725342392921448,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811256527900696,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831628203392029,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817925691604614,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842028617858887,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990786612033844,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915223717689514,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943945407867432,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948062300682068,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956315159797668,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968816637992859,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.self_attn": [
+ {
+ "accuracy": 0.9464938044548035,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9471202492713928,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9588847756385803,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656490087509155,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.974885880947113,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758293628692627,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9813731908798218,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824540615081787,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847753643989563,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851590991020203,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9865814447402954,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877563714981079,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876573085784912,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887695908546448,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932838678359985,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939733147621155,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942378997802734,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956232309341431,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972735643386841,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.mlp": [
+ {
+ "accuracy": 0.9318183660507202,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9332734942436218,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9443495273590088,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9485031366348267,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9649341106414795,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678182005882263,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9729592204093933,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981437623500824,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834232926368713,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820895791053772,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844605326652527,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909387230873108,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917105436325073,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994523823261261,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948991537094116,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957163333892822,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969614148139954,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.self_attn": [
+ {
+ "accuracy": 0.9501757621765137,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.951337993144989,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9576956629753113,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9655025005340576,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721558690071106,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9730760455131531,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9783192276954651,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9795851111412048,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829275012016296,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845775961875916,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859683513641357,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874527454376221,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869758486747742,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886669516563416,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933426380157471,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945386052131653,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944134950637817,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962874054908752,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973529577255249,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.mlp": [
+ {
+ "accuracy": 0.92940354347229,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9308879971504211,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9422849416732788,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9466702938079834,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9635935425758362,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9666082859039307,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9719645380973816,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.980715274810791,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827926158905029,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814193248748779,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838900566101074,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906280636787415,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99144446849823,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943639039993286,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947951436042786,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956643581390381,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969942569732666,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.40.self_attn": [
+ {
+ "accuracy": 0.951934814453125,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9528383016586304,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9596495032310486,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9660060405731201,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9767582416534424,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772152900695801,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827438592910767,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830654263496399,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852290749549866,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850398302078247,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885734915733337,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894993305206299,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896097779273987,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990520715713501,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939936399459839,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994758129119873,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994904100894928,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965277314186096,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979313015937805,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.40.mlp": [
+ {
+ "accuracy": 0.9279307126998901,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9294323325157166,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9409689903259277,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.945414662361145,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9628061652183533,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9658695459365845,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9712775945663452,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802464842796326,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823906421661377,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9809815883636475,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9835128784179688,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990355372428894,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912064671516418,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941500425338745,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945650696754456,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954215884208679,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996740460395813,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.41.self_attn": [
+ {
+ "accuracy": 0.9519467949867249,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9534733295440674,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.96002197265625,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9675674438476562,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9750229120254517,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9761711955070496,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818697571754456,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834392666816711,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851945638656616,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9854916334152222,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987498939037323,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886382222175598,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886749982833862,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989924967288971,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993536651134491,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946059584617615,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994655430316925,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964364767074585,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978652000427246,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.41.mlp": [
+ {
+ "accuracy": 0.9284889101982117,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9299524426460266,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9410016536712646,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9454147815704346,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630385637283325,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9660794138908386,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9713107347488403,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802790880203247,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823992252349854,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810753464698792,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9835746884346008,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903516173362732,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911776781082153,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940664172172546,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945459961891174,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953678250312805,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966540336608887,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.42.self_attn": [
+ {
+ "accuracy": 0.9616678953170776,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9622650742530823,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9697475433349609,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9744482636451721,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803887009620667,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818572402000427,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849295020103455,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866987466812134,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882400631904602,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885031580924988,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905753135681152,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914529919624329,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914224743843079,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923288226127625,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947748780250549,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957057237625122,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954434037208557,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972034096717834,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985409379005432,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.42.mlp": [
+ {
+ "accuracy": 0.9326208233833313,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9340015053749084,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9439868330955505,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9480090737342834,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9652714729309082,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9680780172348022,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9728371500968933,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981497585773468,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834634065628052,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822733998298645,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845874905586243,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991008460521698,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918063282966614,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945603013038635,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949568510055542,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957152009010315,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969934225082397,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.43.self_attn": [
+ {
+ "accuracy": 0.9563398957252502,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9564187526702881,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9655157327651978,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9695358276367188,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801672697067261,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811917543411255,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845325946807861,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856567978858948,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878915548324585,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885661005973816,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898520708084106,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912444949150085,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990538477897644,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921171069145203,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945836663246155,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995683491230011,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952531456947327,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968882203102112,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997617244720459,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.43.mlp": [
+ {
+ "accuracy": 0.9371635913848877,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9384962916374207,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9473474025726318,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9508899450302124,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9676471948623657,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9702790975570679,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9744744896888733,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982794463634491,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845942258834839,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834722280502319,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856254458427429,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915793538093567,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923242926597595,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948658347129822,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952015280723572,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995855987071991,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970408082008362,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.44.self_attn": [
+ {
+ "accuracy": 0.9613963961601257,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9629186391830444,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9675442576408386,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9734923839569092,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794407486915588,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801721572875977,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845354557037354,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856245517730713,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988754391670227,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886866807937622,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901577234268188,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908368587493896,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910938739776611,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917099475860596,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951516389846802,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958016276359558,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961161613464355,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971333742141724,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982826709747314,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.44.mlp": [
+ {
+ "accuracy": 0.9418177008628845,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9430515766143799,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9510312676429749,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.95417720079422,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9701164960861206,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972521960735321,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.976307213306427,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841784238815308,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858052134513855,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847455620765686,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867299795150757,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922428131103516,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929569959640503,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953157305717468,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955384731292725,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961211085319519,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972298741340637,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.45.self_attn": [
+ {
+ "accuracy": 0.9671798944473267,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678220748901367,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9734936952590942,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9770593047142029,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834128618240356,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841364026069641,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987387478351593,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882473349571228,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891912937164307,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894239902496338,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918438196182251,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926326274871826,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925499558448792,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932753443717957,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951413869857788,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995730996131897,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956656098365784,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996568500995636,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983518719673157,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.45.mlp": [
+ {
+ "accuracy": 0.9471739530563354,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9483099579811096,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9554425477981567,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.958203136920929,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9728526473045349,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9750807881355286,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9784308671951294,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856546521186829,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871464371681213,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861313104629517,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879617094993591,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929385185241699,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936060309410095,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957177639007568,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996102511882782,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966410398483276,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977436065673828,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.46.self_attn": [
+ {
+ "accuracy": 0.967778205871582,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9683685302734375,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9742239117622375,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9784662127494812,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834921956062317,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844179153442383,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872710108757019,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885231256484985,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904177784919739,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903071522712708,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919667840003967,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927451014518738,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928160309791565,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934120774269104,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959463477134705,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965716004371643,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966251850128174,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977062940597534,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986270666122437,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.46.mlp": [
+ {
+ "accuracy": 0.9499741792678833,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9510411620140076,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9577375650405884,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9602895379066467,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9743403196334839,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9764187335968018,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9795705080032349,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986502468585968,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878860116004944,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869211912155151,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886406660079956,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933938980102539,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940292239189148,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960886240005493,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962384104728699,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967313408851624,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977487921714783,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.47.self_attn": [
+ {
+ "accuracy": 0.9691627621650696,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9705380797386169,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9750425219535828,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786255359649658,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847984313964844,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857648611068726,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888273477554321,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898872375488281,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910038709640503,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912682771682739,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923546314239502,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931882619857788,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931609034538269,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939625859260559,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961873888969421,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966850876808167,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967454671859741,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978030920028687,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984891414642334,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.47.mlp": [
+ {
+ "accuracy": 0.9524471163749695,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9534656405448914,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9597987532615662,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9621769189834595,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9756070375442505,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9775834679603577,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9805474281311035,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871713519096375,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884790182113647,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875375628471375,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891744256019592,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993657112121582,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942660331726074,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961767792701721,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965355396270752,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970147609710693,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998050332069397,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.48.self_attn": [
+ {
+ "accuracy": 0.9734237790107727,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9739310145378113,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9785552620887756,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822705984115601,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862684607505798,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867746829986572,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900519251823425,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905380606651306,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924371242523193,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920732975006104,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932309985160828,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939243197441101,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940153360366821,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946439862251282,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996534526348114,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99701327085495,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971383810043335,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979796409606934,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987788796424866,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.48.mlp": [
+ {
+ "accuracy": 0.953971803188324,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9549679756164551,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.961044430732727,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9633058309555054,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9764280915260315,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9783238172531128,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981164276599884,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987646758556366,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888959527015686,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879851937294006,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895592927932739,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939338564872742,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945231676101685,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964223504066467,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966510534286499,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971097707748413,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981211423873901,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.49.self_attn": [
+ {
+ "accuracy": 0.9728277921676636,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9737898111343384,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780478477478027,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815722107887268,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861900806427002,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868113994598389,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900490641593933,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909396767616272,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913989305496216,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916397929191589,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931843876838684,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938640594482422,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993905246257782,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994601845741272,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966884255409241,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971314668655396,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972712397575378,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979456663131714,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986264109611511,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.49.mlp": [
+ {
+ "accuracy": 0.9555652141571045,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9565215706825256,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9623543620109558,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9645048379898071,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772310853004456,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9790746569633484,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817993640899658,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880719780921936,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892885684967041,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883970618247986,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899222254753113,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941384792327881,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947089552879333,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965408444404602,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967032074928284,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971340298652649,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980754852294922,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.50.self_attn": [
+ {
+ "accuracy": 0.9749696254730225,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9757494330406189,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9793223738670349,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826341867446899,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873611330986023,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879586696624756,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909205436706543,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916771054267883,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928498864173889,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931950569152832,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939725995063782,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944993853569031,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994723916053772,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952370524406433,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969673752784729,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974410533905029,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975789189338684,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983395934104919,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990624785423279,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.50.mlp": [
+ {
+ "accuracy": 0.9563117623329163,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9572614431381226,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9630103707313538,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9651142358779907,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9776173233985901,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794315099716187,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821071028709412,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882876873016357,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894927144050598,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886007905006409,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901062250137329,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994263231754303,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948298931121826,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966446757316589,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969107508659363,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973578453063965,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983831644058228,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.51.self_attn": [
+ {
+ "accuracy": 0.9723612666130066,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9732444286346436,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774115681648254,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814422726631165,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855733513832092,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858569502830505,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900569319725037,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902735948562622,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920966625213623,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925727844238281,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931262135505676,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938168525695801,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941162467002869,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947735667228699,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967669248580933,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972162842750549,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974873661994934,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981306195259094,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988936185836792,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.51.mlp": [
+ {
+ "accuracy": 0.9577786326408386,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9586864709854126,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9642301201820374,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662446975708008,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.978367030620575,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801216721534729,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9826943278312683,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988665759563446,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898231029510498,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98896324634552,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990408182144165,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943976402282715,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949347972869873,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966541528701782,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996851921081543,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972554445266724,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981426000595093,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.52.self_attn": [
+ {
+ "accuracy": 0.9806666970252991,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811267256736755,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843558669090271,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98642897605896,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899234771728516,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902141690254211,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925476312637329,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930080771446228,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940158128738403,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941444396972656,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995101273059845,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956563115119934,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956219792366028,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996209979057312,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975281953811646,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979854226112366,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979499578475952,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986925721168518,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992536306381226,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.52.mlp": [
+ {
+ "accuracy": 0.959108293056488,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9600082039833069,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9652302265167236,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9671441316604614,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9790430665016174,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807626605033875,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831824898719788,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988990068435669,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901213645935059,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989285945892334,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906972050666809,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945380091667175,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950542449951172,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967019557952881,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996957004070282,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973391890525818,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982107877731323,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.53.self_attn": [
+ {
+ "accuracy": 0.9779462218284607,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9787406921386719,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827246069908142,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846527576446533,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890528321266174,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896629452705383,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917614459991455,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924411177635193,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930567741394043,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932515621185303,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947630167007446,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995223879814148,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955512285232544,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959713220596313,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974278211593628,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979852437973022,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978631138801575,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987279176712036,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991778135299683,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.53.mlp": [
+ {
+ "accuracy": 0.9607957005500793,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9616606831550598,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9665985107421875,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9684074521064758,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9799195528030396,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815429449081421,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838400483131409,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894484877586365,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990523636341095,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897304177284241,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910717606544495,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947601556777954,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952478408813477,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968311786651611,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970794916152954,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974414110183716,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982792735099792,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.54.self_attn": [
+ {
+ "accuracy": 0.9808214902877808,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814398884773254,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852087497711182,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869551062583923,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902269244194031,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907662868499756,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930158257484436,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937596917152405,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948115348815918,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949373602867126,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953887462615967,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958353042602539,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961305260658264,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964646100997925,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977927803993225,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981395602226257,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981768131256104,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988007545471191,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992905259132385,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.54.mlp": [
+ {
+ "accuracy": 0.9617258906364441,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9625528454780579,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9672908186912537,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.969018280506134,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9804181456565857,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820070266723633,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841914772987366,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897068738937378,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907723665237427,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899919033050537,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913070201873779,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949122667312622,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953869581222534,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969401955604553,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971950054168701,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975465536117554,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983964562416077,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.55.self_attn": [
+ {
+ "accuracy": 0.9800737500190735,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981036901473999,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848119616508484,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873806238174438,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989960789680481,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905515313148499,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934605360031128,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941214323043823,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948420524597168,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950358867645264,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951728582382202,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955883622169495,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960614442825317,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964731931686401,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997783362865448,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981345534324646,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982259273529053,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987561702728271,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992566704750061,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.55.mlp": [
+ {
+ "accuracy": 0.9627878069877625,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9636444449424744,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9679937362670898,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9695963263511658,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9809333682060242,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982501745223999,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844860434532166,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898688197135925,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908896088600159,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901388883590698,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914180040359497,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948018789291382,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952191710472107,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965977072715759,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970895051956177,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973890781402588,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981272220611572,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.56.self_attn": [
+ {
+ "accuracy": 0.9752034544944763,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9763630032539368,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9802281856536865,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839984774589539,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877219200134277,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987982451915741,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992669403553009,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930444359779358,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936023354530334,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938399195671082,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939477443695068,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943765997886658,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951106905937195,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954420924186707,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997350811958313,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997619092464447,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980720281600952,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998394250869751,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990727305412292,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.56.mlp": [
+ {
+ "accuracy": 0.9657200574874878,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9665122628211975,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9702794551849365,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.971675455570221,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823814630508423,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.983855664730072,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855334758758545,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905179142951965,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914619326591492,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908064007759094,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919626712799072,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950956106185913,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953182935714722,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966782927513123,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970911145210266,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997326672077179,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979304075241089,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.57.self_attn": [
+ {
+ "accuracy": 0.986625611782074,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870643019676208,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894611835479736,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913647174835205,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933096170425415,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993571937084198,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955630302429199,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960556030273438,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964077472686768,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965390563011169,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967729449272156,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970270991325378,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973542094230652,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975465536117554,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984753131866455,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985825419425964,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987717866897583,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989755153656006,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999299168586731,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.57.mlp": [
+ {
+ "accuracy": 0.9685173630714417,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9692901968955994,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725824594497681,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9738173484802246,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.983854353427887,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852574467658997,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867122173309326,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912779927253723,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921209216117859,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915479421615601,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926221370697021,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953883290290833,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956555962562561,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967787265777588,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972021579742432,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997399091720581,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979239106178284,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.58.self_attn": [
+ {
+ "accuracy": 0.9861055612564087,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986365556716919,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9886723160743713,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905329346656799,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929289817810059,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933069944381714,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953204393386841,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957102537155151,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962227940559387,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963284134864807,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967322945594788,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969680309295654,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972233176231384,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974170327186584,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984096884727478,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984683394432068,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987388849258423,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988744854927063,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991764426231384,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.58.mlp": [
+ {
+ "accuracy": 0.969566822052002,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9704891443252563,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731537103652954,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9741712212562561,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845026135444641,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9857882261276245,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869593977928162,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913132190704346,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919530153274536,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916771650314331,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992486298084259,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950717091560364,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950191974639893,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961662888526917,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996845006942749,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969819188117981,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973827600479126,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.59.self_attn": [
+ {
+ "accuracy": 0.9904834628105164,
+ "total_bits": 248534016,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907311201095581,
+ "total_bits": 255874048,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921225905418396,
+ "total_bits": 261933440,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936809539794922,
+ "total_bits": 311705344,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953365921974182,
+ "total_bits": 368585216,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955551624298096,
+ "total_bits": 368817280,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969096183776855,
+ "total_bits": 473442816,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971984028816223,
+ "total_bits": 473674880,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99711012840271,
+ "total_bits": 477587712,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970989227294922,
+ "total_bits": 484485120,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977273344993591,
+ "total_bits": 486257792,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977944493293762,
+ "total_bits": 490137856,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997983455657959,
+ "total_bits": 496260096,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980278611183167,
+ "total_bits": 503130112,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987089037895203,
+ "total_bits": 610019840,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987083673477173,
+ "total_bits": 620570624,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998917818069458,
+ "total_bits": 708323840,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989064931869507,
+ "total_bits": 734046208,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993255138397217,
+ "total_bits": 943204864,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.59.mlp": [
+ {
+ "accuracy": 0.9641786813735962,
+ "total_bits": 983263488,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9657571911811829,
+ "total_bits": 1019963648,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9673142433166504,
+ "total_bits": 1139319296,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9680131077766418,
+ "total_bits": 1278779904,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820795655250549,
+ "total_bits": 1439479680,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834616780281067,
+ "total_bits": 1479461888,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840754270553589,
+ "total_bits": 1591146112,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896513223648071,
+ "total_bits": 1819203328,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900665879249573,
+ "total_bits": 1846070272,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898846745491028,
+ "total_bits": 1872541568,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907495975494385,
+ "total_bits": 1912523776,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993096113204956,
+ "total_bits": 2305603456,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932379126548767,
+ "total_bits": 2345585664,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994042694568634,
+ "total_bits": 2671556480,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950992465019226,
+ "total_bits": 2763141888,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951664805412292,
+ "total_bits": 3012702976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995543897151947,
+ "total_bits": 3536990976,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.norm.norm": null,
+ "lm_head.linear": null
+ },
+ "last_module_idx": 122
+}
\ No newline at end of file
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..1d3fe2b6f40a6cdf01ef2774e0c6858a64050302
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,550 @@
+{
+ "metadata": {
+ "total_size": 68777834496
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00015-of-00015.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00015.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00015.safetensors",
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00015.safetensors",
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00003-of-00015.safetensors",
+ "model.layers.10.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.10.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00003-of-00015.safetensors",
+ "model.layers.11.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.11.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00004-of-00015.safetensors",
+ "model.layers.12.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.12.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.12.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00004-of-00015.safetensors",
+ "model.layers.13.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.13.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.13.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00004-of-00015.safetensors",
+ "model.layers.14.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.14.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.14.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00004-of-00015.safetensors",
+ "model.layers.15.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.15.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00005-of-00015.safetensors",
+ "model.layers.16.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.16.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00005-of-00015.safetensors",
+ "model.layers.17.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.17.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.17.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00005-of-00015.safetensors",
+ "model.layers.18.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.18.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.18.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00005-of-00015.safetensors",
+ "model.layers.19.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.19.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.19.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00015.safetensors",
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00006-of-00015.safetensors",
+ "model.layers.20.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.20.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00006-of-00015.safetensors",
+ "model.layers.21.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.21.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.21.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00006-of-00015.safetensors",
+ "model.layers.22.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.22.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.22.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00006-of-00015.safetensors",
+ "model.layers.23.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.23.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.23.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00006-of-00015.safetensors",
+ "model.layers.24.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.24.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.24.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00007-of-00015.safetensors",
+ "model.layers.25.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.25.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.25.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00007-of-00015.safetensors",
+ "model.layers.26.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.26.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.26.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00007-of-00015.safetensors",
+ "model.layers.27.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.27.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.27.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00007-of-00015.safetensors",
+ "model.layers.28.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.28.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.28.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00008-of-00015.safetensors",
+ "model.layers.29.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.29.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.29.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00015.safetensors",
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00008-of-00015.safetensors",
+ "model.layers.30.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.30.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.30.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00008-of-00015.safetensors",
+ "model.layers.31.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.31.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.31.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00008-of-00015.safetensors",
+ "model.layers.32.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.32.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.32.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00009-of-00015.safetensors",
+ "model.layers.33.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.33.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.33.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00009-of-00015.safetensors",
+ "model.layers.34.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.34.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.34.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00009-of-00015.safetensors",
+ "model.layers.35.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.35.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.35.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00009-of-00015.safetensors",
+ "model.layers.36.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.36.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.36.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00009-of-00015.safetensors",
+ "model.layers.37.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.37.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.37.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00010-of-00015.safetensors",
+ "model.layers.38.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.38.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.38.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00010-of-00015.safetensors",
+ "model.layers.39.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.39.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.39.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00002-of-00015.safetensors",
+ "model.layers.4.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.4.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00010-of-00015.safetensors",
+ "model.layers.40.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.40.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.40.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
+ "model.layers.40.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.40.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.40.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00010-of-00015.safetensors",
+ "model.layers.41.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.41.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.41.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
+ "model.layers.41.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.41.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.41.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00011-of-00015.safetensors",
+ "model.layers.42.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.42.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.42.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
+ "model.layers.42.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.42.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.42.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00011-of-00015.safetensors",
+ "model.layers.43.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.43.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.43.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
+ "model.layers.43.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.43.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.43.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00011-of-00015.safetensors",
+ "model.layers.44.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.44.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.44.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
+ "model.layers.44.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.44.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.44.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00011-of-00015.safetensors",
+ "model.layers.45.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.45.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.45.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
+ "model.layers.45.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.45.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.45.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00012-of-00015.safetensors",
+ "model.layers.46.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.46.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.46.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
+ "model.layers.46.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.46.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.46.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00012-of-00015.safetensors",
+ "model.layers.47.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.47.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.47.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
+ "model.layers.47.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.47.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.47.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.48.input_layernorm.weight": "model-00012-of-00015.safetensors",
+ "model.layers.48.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.48.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.48.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.48.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
+ "model.layers.48.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.48.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.48.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.48.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.49.input_layernorm.weight": "model-00012-of-00015.safetensors",
+ "model.layers.49.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.49.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.49.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.49.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
+ "model.layers.49.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.49.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.49.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.49.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00002-of-00015.safetensors",
+ "model.layers.5.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.5.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.50.input_layernorm.weight": "model-00012-of-00015.safetensors",
+ "model.layers.50.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.50.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.50.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.50.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
+ "model.layers.50.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.50.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.50.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.50.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.51.input_layernorm.weight": "model-00013-of-00015.safetensors",
+ "model.layers.51.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.51.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.51.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.51.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
+ "model.layers.51.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.51.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.51.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.51.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
+ "model.layers.52.input_layernorm.weight": "model-00013-of-00015.safetensors",
+ "model.layers.52.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.52.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.52.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.52.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
+ "model.layers.52.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.52.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.52.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.52.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.53.input_layernorm.weight": "model-00013-of-00015.safetensors",
+ "model.layers.53.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.53.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.53.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.53.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
+ "model.layers.53.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.53.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.53.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.53.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.54.input_layernorm.weight": "model-00013-of-00015.safetensors",
+ "model.layers.54.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.54.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.54.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.54.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
+ "model.layers.54.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.54.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.54.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.54.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.55.input_layernorm.weight": "model-00014-of-00015.safetensors",
+ "model.layers.55.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.55.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.55.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.55.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
+ "model.layers.55.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.55.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.55.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.55.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
+ "model.layers.56.input_layernorm.weight": "model-00014-of-00015.safetensors",
+ "model.layers.56.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.56.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.56.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.56.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
+ "model.layers.56.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.56.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.56.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.56.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.57.input_layernorm.weight": "model-00014-of-00015.safetensors",
+ "model.layers.57.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.57.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.57.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.57.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
+ "model.layers.57.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.57.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.57.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.57.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.58.input_layernorm.weight": "model-00014-of-00015.safetensors",
+ "model.layers.58.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.58.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.58.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.58.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
+ "model.layers.58.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.58.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.58.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.58.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.59.input_layernorm.weight": "model-00015-of-00015.safetensors",
+ "model.layers.59.mlp.down_proj.weight": "model-00015-of-00015.safetensors",
+ "model.layers.59.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.59.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.59.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
+ "model.layers.59.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.59.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.59.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.59.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00015.safetensors",
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00003-of-00015.safetensors",
+ "model.layers.7.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.7.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00003-of-00015.safetensors",
+ "model.layers.8.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.8.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00003-of-00015.safetensors",
+ "model.layers.9.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.9.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
+ "model.norm.weight": "model-00015-of-00015.safetensors"
+ }
+}
diff --git a/output-00001-of-00003.safetensors b/output-00001-of-00003.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..09f66324cfad10f133cd7fb1c367ac256fbf59a3
--- /dev/null
+++ b/output-00001-of-00003.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da6250319eb9f0e7b49d0b3a3600fb11fb95092afcee139a8f050047161252ca
+size 8557319392
diff --git a/output-00002-of-00003.safetensors b/output-00002-of-00003.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c7e64637997a08080b4a5fa01c3119de2b9f2d1e
--- /dev/null
+++ b/output-00002-of-00003.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b550f8963a5b325c8146ebd60283dd1f86534922c6cc5c6ca98156a39996db92
+size 8535512008
diff --git a/output-00003-of-00003.safetensors b/output-00003-of-00003.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0c408592cda21ec8f66a483d3d49c45b757a860d
--- /dev/null
+++ b/output-00003-of-00003.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc70f87f2ccb2acc852032866731ca79e672280b154baacdce7b7bf5b7140754
+size 3659894776
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..14761dcf1466dc232bd41de9c21d4c617b15755e
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,24 @@
+{
+ "bos_token": {
+ "content": " ": 53,
+ "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 2,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": {
+ "type": "Sequence",
+ "normalizers": [
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": " "
+ },
+ "content": "▁"
+ }
+ ]
+ },
+ "pre_tokenizer": null,
+ "post_processor": {
+ "type": "TemplateProcessing",
+ "single": [
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ }
+ ],
+ "pair": [
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "B",
+ "type_id": 1
+ }
+ }
+ ],
+ "special_tokens": {}
+ },
+ "decoder": {
+ "type": "Sequence",
+ "decoders": [
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": "▁"
+ },
+ "content": " "
+ },
+ {
+ "type": "ByteFallback"
+ },
+ {
+ "type": "Fuse"
+ }
+ ]
+ },
+ "model": {
+ "type": "BPE",
+ "dropout": null,
+ "unk_token": "": 1,
+ "": 2,
+ "<|Human|>": 3,
+ "<|Assistant|>": 4,
+ "<|System|>": 5,
+ "<|im_start|>": 6,
+ "<|im_end|>": 7,
+ "<|im_sep|>": 8,
+ "<|reserved003|>": 9,
+ "<|reserved004|>": 10,
+ "<|reserved005|>": 11,
+ "<|reserved006|>": 12,
+ "<|reserved007|>": 13,
+ "": 32,
+ "": 33,
+ "
": 34,
+ "": 35,
+ "": 36,
+ "
": 37,
+ "": 38,
+ "": 39,
+ "
": 40,
+ "": 41,
+ "": 42,
+ "
": 43,
+ "": 44,
+ "": 45,
+ "
": 46,
+ "
": 47,
+ "
": 48,
+ "": 49,
+ "": 50,
+ "": 51,
+ "": 52,
+ "