diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..581c7ed6694dbd19458d8bea49ac769bd0451718
--- /dev/null
+++ b/README.md
@@ -0,0 +1,78 @@
+---
+license: apache-2.0
+language:
+ - en
+ - fr
+ - de
+ - es
+ - it
+ - pt
+ - ru
+ - zh
+ - ja
+quantized_by: bartowski
+pipeline_tag: text-generation
+---
+
+## Exllama v2 Quantizations of mini-magnum-12b-v1.1
+
+Using turboderp's ExLlamaV2 v0.1.8 for quantization.
+
+The "main" branch only contains the measurement.json, download one of the other branches for the model (see below)
+
+Each branch contains an individual bits per weight, with the main one containing only the meaurement.json for further conversions.
+
+Conversion was done using the default calibration dataset.
+
+Default arguments used except when the bits per weight is above 6.0, at that point the lm_head layer is quantized at 8 bits per weight instead of the default 6.
+
+Original model: https://huggingface.co/intervitens/mini-magnum-12b-v1.1
+
+
+8.0 bits per weight
+
+6.5 bits per weight
+
+5.0 bits per weight
+
+4.25 bits per weight
+
+3.5 bits per weight
+
+
+## Download instructions
+
+With git:
+
+```shell
+git clone --single-branch --branch 6_5 https://huggingface.co/bartowski/mini-magnum-12b-v1.1-exl2
+```
+
+With huggingface hub (credit to TheBloke for instructions):
+
+```shell
+pip3 install huggingface-hub
+```
+
+To download the `main` (only useful if you only care about measurement.json) branch to a folder called `mini-magnum-12b-v1.1-exl2`:
+
+```shell
+mkdir mini-magnum-12b-v1.1-exl2
+huggingface-cli download bartowski/mini-magnum-12b-v1.1-exl2 --local-dir mini-magnum-12b-v1.1-exl2
+```
+
+To download from a different branch, add the `--revision` parameter:
+
+Linux:
+
+```shell
+mkdir mini-magnum-12b-v1.1-exl2-6_5
+huggingface-cli download bartowski/mini-magnum-12b-v1.1-exl2 --revision 6_5 --local-dir mini-magnum-12b-v1.1-exl2-6_5
+```
+
+Windows (which apparently doesn't like _ in folders sometimes?):
+
+```shell
+mkdir mini-magnum-12b-v1.1-exl2-6.5
+huggingface-cli download bartowski/mini-magnum-12b-v1.1-exl2 --revision 6_5 --local-dir mini-magnum-12b-v1.1-exl2-6.5
+```
diff --git a/measurement.json b/measurement.json
new file mode 100644
index 0000000000000000000000000000000000000000..9cf9e34e56e2b1d4898c490cef637f64ba69adb0
--- /dev/null
+++ b/measurement.json
@@ -0,0 +1,78047 @@
+{
+ "measurement": {
+ "model.layers.0.self_attn": [
+ {
+ "accuracy": 0.8861993739479467,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9027069116893568,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9155824498126381,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9420169905612343,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.948323550977205,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9505465313007957,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9633521908207944,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9650674430947555,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687849879264832,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9710030273387307,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.97573931123081,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9773431451697099,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9789061122818997,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808355475726881,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880589278120744,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902086485373346,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902729556748742,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940004144844256,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973414514802004,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.0.mlp": [
+ {
+ "accuracy": 0.9157888387378893,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.921570106556541,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9302359944895694,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9308528147245708,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803961625224665,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837989069913563,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9853551434843164,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892030899461947,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932142198085785,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912673431007486,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939625282820902,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965364395787841,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969669047154879,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979947238768402,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981925507125101,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986566089485821,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991905634830657,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.self_attn": [
+ {
+ "accuracy": 0.9747781079066428,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9759079155169035,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9795585227640051,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840788276571977,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850583986232155,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986634163480056,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876531762512106,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893138126323098,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923299896089655,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926220594268096,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934307650515908,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943258154549097,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939614052050992,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949229669414068,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99642471655419,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974997302419261,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969249834355555,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988413009988634,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991948357048003,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.1.mlp": [
+ {
+ "accuracy": 0.9770557864716178,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9776404876458017,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814974195078799,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982762939051578,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884254077547475,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893457215083273,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910797476768494,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939702188498095,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945331439375877,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941173148782629,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994847477266663,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970033337411127,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974324356176352,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983736712877688,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984733709379247,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988209547376946,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995593719772602,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.self_attn": [
+ {
+ "accuracy": 0.9625284922750372,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9635801158453289,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972254825265784,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765779156433908,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9790249341412595,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818554285325503,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815040920910082,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844483388097662,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989184414085589,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895803834262648,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99056954054456,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931140910638007,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912261029607371,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937450681862078,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947221173267615,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968256487658149,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951193705201149,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986077057883928,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987148653323713,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.2.mlp": [
+ {
+ "accuracy": 0.9597314502063551,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9605052345677426,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9568213569490533,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9571818176068758,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9788379826043782,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988982911172666,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899004940923891,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831791426006117,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910958409309387,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874635009389174,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930437254278284,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955894284342465,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944526831570425,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959357180877736,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983822589642123,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985719653532693,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987526141891354,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.self_attn": [
+ {
+ "accuracy": 0.9879614687279651,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887772900493521,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990348817486512,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927816249822315,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939748629143363,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994101054574314,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956154078245163,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957218064289344,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961140210691252,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963581875750893,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997013224975059,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972627313905641,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973256164475491,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975866863602086,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985189388849234,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987676503430856,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988421959508407,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993649209408384,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996860792958423,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.3.mlp": [
+ {
+ "accuracy": 0.9802224479223552,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807959873425333,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841163801519495,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851736222442827,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899860291104567,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990789173465026,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922558916242499,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947601062686819,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952396856326806,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948924983802595,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955316185951233,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973963548086191,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997771581732913,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985814922930378,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986728181768405,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989819121792128,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9996237833494026,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.self_attn": [
+ {
+ "accuracy": 0.9838527723362571,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845198345811743,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870521739909524,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900124237725609,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909704233470716,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914438450022748,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927143478079846,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993329580677183,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946969367171589,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948219782427737,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956163548325238,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962678249729308,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960127319944533,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966709307934108,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977835703052973,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983305507584622,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981281518151885,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991448649058217,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994906617426559,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.4.mlp": [
+ {
+ "accuracy": 0.975142555801492,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9759100361874229,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801600528390784,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814635577954745,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874362427937357,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884659193064037,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903009384870529,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934379874091399,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940325845228998,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9935986631010708,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944020219539341,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967379620984981,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972094658174013,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982243160668173,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99833192186136,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987192982317585,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999525702313373,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.self_attn": [
+ {
+ "accuracy": 0.9843348735257199,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850004566343207,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870346886546988,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903069318909394,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991737898243101,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920510775164554,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938550175804841,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942189102110109,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950938467916689,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953834751718923,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959098775135843,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963964572862575,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963908215102396,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968458084683669,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979839893548113,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983821341669873,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984075858404762,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99911155502655,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9995764051809123,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.5.mlp": [
+ {
+ "accuracy": 0.9698219048349481,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9707501091455158,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758880797185396,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774523408789384,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847321573056673,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859827882365176,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988193712736431,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920010354958082,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927335883441725,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922214283754951,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931952169066981,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960381678844753,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966092890030459,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978389720383444,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979800366257366,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984448631539157,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994259051194316,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.self_attn": [
+ {
+ "accuracy": 0.9797204434871674,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9806050658226013,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982622681479705,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872010742363176,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896693253203442,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899289529574545,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928998719704779,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932126277371457,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9935667506958309,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937015697360039,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947485413990522,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951039521317733,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953182708275946,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956618614102665,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974363769747709,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978250394526281,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980864548369458,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987236881922734,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994995633238241,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.6.mlp": [
+ {
+ "accuracy": 0.9641801620784559,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9653433906404596,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9714955122847306,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733498221949527,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9819622133907518,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834335587526623,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860524001874422,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990555487181011,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914031679693022,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908087104558945,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919623026722356,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953214298737677,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959939255526191,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974418868751902,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976128249576217,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9981606355623195,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993219215324834,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.self_attn": [
+ {
+ "accuracy": 0.9756527605809664,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766638294646615,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791543436677832,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848917942298087,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875623332826715,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878695144465095,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991774740972017,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921638048008868,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928840900722303,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993301596296461,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937279369486006,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942910945729205,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994543232023716,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950843480856795,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996981575104751,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975141463311095,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978066623995179,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986452055408767,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9994272651444924,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.7.mlp": [
+ {
+ "accuracy": 0.9610076734894201,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9622226577056081,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9688299706107691,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9707918198485124,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803748820957384,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9819647152172891,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847479744961387,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897683929455908,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906498530977651,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899837696238568,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912377305720982,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948916062712669,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956306999451235,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997216273099184,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973870368772432,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979784751408979,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992574067590269,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.self_attn": [
+ {
+ "accuracy": 0.9738372783911856,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9752110133045598,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9781653692847804,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838894950716119,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864098413994438,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868037559484181,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905525132229454,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910722610197569,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923256376856252,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928902058224929,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931361345868361,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939548875156202,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942325949668884,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946778534274352,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967466051641264,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973576782565368,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976461327781803,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998511784857041,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9993437042361811,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.8.mlp": [
+ {
+ "accuracy": 0.9584072169504667,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9596678708728991,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9666492405690645,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9687599131935521,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9789275919136248,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9806658296208632,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836361596458837,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889782078956303,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899527399163497,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989231778602851,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905891794907419,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945033016173463,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953007686295008,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969976311059374,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971939535125306,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978333266549989,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992026123953494,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.self_attn": [
+ {
+ "accuracy": 0.9687983926973844,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9700566279260736,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9727770717520463,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9799098874393263,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837110277853514,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840396485830608,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892109248198961,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896116766490435,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906673831375021,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913566379170669,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.991751787693877,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924202718232807,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992731082596277,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993355327530911,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957696917025667,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966523906117991,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996818898735862,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980398002815875,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991535402246212,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.9.mlp": [
+ {
+ "accuracy": 0.9581836587504337,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9594866037368774,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663955318300348,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9684719851142481,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9784271717071533,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.980490715880143,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834056888755999,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887824074218148,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989612592678321,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890436694810265,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905043048293967,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944613646519812,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952534609719327,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969458829023337,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971515101037527,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977871583480584,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991137420660571,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.self_attn": [
+ {
+ "accuracy": 0.968756399656597,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9697988943049782,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725370312991896,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801396853045413,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841477619974237,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844495042374259,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900720817478079,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904513194372779,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895476596920114,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901517449240935,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918379015044162,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924490043991491,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928768849686572,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934640613041426,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960096121618622,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996737971117622,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997237433532351,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998132262574999,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992699454979677,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.10.mlp": [
+ {
+ "accuracy": 0.9560421579762509,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9574175759365684,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.964729230654867,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966893334137766,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9776929914951324,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796027274508226,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982646644115448,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988293490911785,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989315611751456,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885669107499876,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900266026195726,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941402156102029,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950079698311655,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967750118751275,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996999917649909,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976893063438567,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991394667641113,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.self_attn": [
+ {
+ "accuracy": 0.968060198583101,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689608059431377,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9715681044678939,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9785929165388408,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832227151644858,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.983438734945498,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887808075076655,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889671480969379,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897781971253847,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907171702698657,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917355305270145,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918740776024366,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922294938250592,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930663124511117,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957434017407266,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965763966503897,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968750476837158,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979326056414529,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991996444663719,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.11.mlp": [
+ {
+ "accuracy": 0.9558642632082889,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9572393486374303,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9643692970275879,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9664339454550492,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9776644989063865,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796499170755085,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825349308942494,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883037985939729,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893519211756555,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885186796125612,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900332971623069,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9940978923910543,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950058491606462,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967543939618688,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969639329141692,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976172333485201,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99912727477127,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.self_attn": [
+ {
+ "accuracy": 0.9692779936288533,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9701270994387174,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731957943815934,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979852064659721,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836353110639673,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9841625078728324,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884561872796008,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989043304794713,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904562140765943,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911651972093081,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919362217187881,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926334745005557,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929889495435514,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934513427709278,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961025471750059,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967438817808503,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971204794159061,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978973275344623,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9992164139586844,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.12.mlp": [
+ {
+ "accuracy": 0.9561777083497298,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9574892489533675,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9642992898037559,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9663191814171641,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9777501790147078,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796842681734186,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825452817113776,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883165288912622,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893337582287035,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885843411872262,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900460384393993,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941326967979732,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950156533404401,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967642920581918,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970012840471769,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976286670487178,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991178936079929,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.self_attn": [
+ {
+ "accuracy": 0.9649442151973122,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9657493202309859,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9683400706241005,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9751609877536171,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810977829130072,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814999652536291,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862737200762096,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867481165810635,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888588919451362,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897303706721255,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906294039989773,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916499186503259,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916685231422123,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928952306509018,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953117370605469,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996079836628939,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964774627434579,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973297754400655,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989897047022456,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.13.mlp": [
+ {
+ "accuracy": 0.9543835740340383,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9557291426156697,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9626167165605646,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9646674582832738,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9768795559280797,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9788646243120495,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818145648429268,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987860379250426,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9889418451409591,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881449835865121,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896877917804217,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939144183146326,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948229672093141,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966378792336112,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968708214399061,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997513512639623,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990719295057812,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.self_attn": [
+ {
+ "accuracy": 0.9624936517916227,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9632490685111598,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662240680895353,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9715153920023065,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821175854457053,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817884721254048,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864363670349121,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860326067397469,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859358119337183,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869677793038519,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909170591517499,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912727349682858,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915191037090201,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917711780259484,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952943756392127,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960359609440753,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996225188437261,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9976868354960492,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990227443999365,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.14.mlp": [
+ {
+ "accuracy": 0.9523999691009521,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9539323605989155,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9612411229233992,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.96346903788416,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9758435628916088,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9779031841378463,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810159927920291,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872376934478158,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9883346628201636,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875525330242357,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891618945096669,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9936125102011781,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945664762666351,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996434503087872,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967226246862035,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973955505380505,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990169444170437,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.self_attn": [
+ {
+ "accuracy": 0.9558553570195248,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9564712894590277,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9589930113993193,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647484139392251,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.980907385286532,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811621920058602,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860415662589826,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864080407117543,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866700062626287,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868427773839549,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896676359992278,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905242080751219,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905491596774051,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914333059599525,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948028741698516,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951455177445161,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957532937589445,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970675432368329,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988242955388207,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.15.mlp": [
+ {
+ "accuracy": 0.9508339794058549,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9524242030946832,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9603233964819657,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9627782169141268,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9749717586918881,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9771170365182977,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9804927330268057,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867195785045624,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98789150777616,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871616630177749,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888097424256174,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934219647394983,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943968320363447,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963407771367776,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966385160621843,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9973781030429038,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990163267050919,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.self_attn": [
+ {
+ "accuracy": 0.9564717662961859,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.957555817930322,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9594237929896304,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9655819033321581,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9789362016477083,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9792717679550773,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851587524539546,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855194578045293,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870413486894808,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876254655812916,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894808820988003,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990108858597906,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9904681036346837,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989272980313552,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994770321798952,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956939636092437,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959171912387798,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974655773686735,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988839515533886,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.16.mlp": [
+ {
+ "accuracy": 0.9474401191661233,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.949175753091511,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.957742336549257,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9604784720822385,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9731724732800534,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754617276944613,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791708023924577,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856867256917452,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869645151652788,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9862411618232727,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879922231561259,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929493116705042,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939890527411511,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960543438792229,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964046497878275,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997229855899748,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989372817309279,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.self_attn": [
+ {
+ "accuracy": 0.9552690606368215,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.956904715613315,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9607015659934596,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9695559018536618,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9771807962342313,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774324846895117,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9849057103458204,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851100334995672,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860568219109586,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867441434609262,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880418808836686,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891029086552168,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896712028666547,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906756007357648,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994230575075275,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952839491398711,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956789530421558,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9972750328873333,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988815295264909,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.17.mlp": [
+ {
+ "accuracy": 0.9427923026837801,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9447937074460482,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9541631748801783,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9572407101330004,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.970857256337216,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9733798190167076,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774117422731299,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843159490510037,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985775605628365,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850628297579916,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9869919121265411,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923548737638875,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9934908786886617,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956788452048051,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996116026843849,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970171400590947,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988352452453814,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.self_attn": [
+ {
+ "accuracy": 0.95518454125053,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9571306642733122,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9610939810150548,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9694613030082301,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9764336441692553,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9771983748988101,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831582982289163,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840818769053409,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9867728430973856,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9876987596875743,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9880945337446112,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893039457107845,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892073861862484,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907417579701072,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938954365880865,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954509017498869,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951256478303357,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974113334166376,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986805883481314,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.18.mlp": [
+ {
+ "accuracy": 0.9404248312899941,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9424903267308286,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9522932265934191,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9555276538196363,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9697347597071999,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9723209236797533,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9765640465836776,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837511338685688,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852234391789687,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844931222890553,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986489928082416,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920621530005806,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932317827877245,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955156433739161,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959552735090256,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968840799441463,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998757928609848,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.self_attn": [
+ {
+ "accuracy": 0.9552436031793293,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9565852786365309,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9604628117460954,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9681602779187655,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9752807146624515,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9773340287961458,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9809992329070443,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834936449402257,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986386587745265,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872396831449709,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9878293534642771,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895233033519042,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988981083035469,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909488503870211,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931724573436537,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952411263396865,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942041204163903,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971756076342181,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998496691451261,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.19.mlp": [
+ {
+ "accuracy": 0.9398426633132131,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9418570430655229,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9516242585684124,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9548439289394178,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9694770982390956,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9720623681419774,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.976302948437239,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836770939199548,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851189836075431,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843867358408476,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863922831259275,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920130254406678,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931989445498115,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954893204726671,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959442090047034,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996842589817549,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987699331617669,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.self_attn": [
+ {
+ "accuracy": 0.9583444971787303,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9595722712968525,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9635420504369234,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9705477388281571,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9780600666999817,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794439375400543,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.983158108435179,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9848558008670807,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877567275574333,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9872007275882521,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896580976875204,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908480424630014,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906042673085865,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916768968105316,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942757604937804,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958050458839065,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952647372295982,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9975560599643933,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998774887307694,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.20.mlp": [
+ {
+ "accuracy": 0.9396625129800094,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9416064839614064,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9514080976185045,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9546490813556471,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9694259637280515,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9719535363347906,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9762530326843262,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837130731657932,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851842506935722,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843906697474027,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.986358631598322,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920285508820885,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931832700967789,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955552655615305,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959463747708421,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968394670439394,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987790092433754,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.self_attn": [
+ {
+ "accuracy": 0.9631492150457281,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9644362800999692,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9676912646544606,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9751267699818862,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9809741158234445,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9816323393269589,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864696515233893,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987260913378314,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890028159869345,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900338806604084,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990526703627486,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913280370988344,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912504726334622,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927341091005426,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951960522877542,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962384889000341,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996278313429732,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977626290760542,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990059546145954,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.21.mlp": [
+ {
+ "accuracy": 0.9375247014196295,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9394493542219463,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9493950762246784,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9526702165603638,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9682717856607939,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9708593424997831,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9752453455799505,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831629709193581,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846791938731545,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838147602583233,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858506666986566,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917478749626562,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929328254963222,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954230338335037,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958093954544318,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967231381880609,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987529590725899,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.self_attn": [
+ {
+ "accuracy": 0.9605547126970793,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9621654435207969,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9660831595721998,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9729549665200082,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9789493648629439,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.979860434406682,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9840163212073477,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851345325771131,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873905189727482,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9881163647300318,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989553595844068,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906485166988874,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990364775845879,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918101390725688,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942064704863649,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958845102473309,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995071320157302,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977272872469927,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987160097807646,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.22.mlp": [
+ {
+ "accuracy": 0.9338455200195312,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.935891634539554,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9463231563568115,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9497667394186321,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662687339280781,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691068875162225,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.973728253653175,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820577863015627,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9837072648500141,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827222573129755,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984964381707342,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911558008507678,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924851475577605,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950627649300977,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954929591009491,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964537589173568,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986324404415331,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.self_attn": [
+ {
+ "accuracy": 0.9640370983826487,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9653179896505255,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9690773926283184,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9757371215443862,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807618181956442,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9819216963491941,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9858754763477727,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987268696490087,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9888724025927091,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897661954164505,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906557727801172,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9917079422034716,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915723212455448,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926917976454684,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950239356411131,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962930993029946,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959984570741653,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979628250002861,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988997523721895,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.23.mlp": [
+ {
+ "accuracy": 0.9309959035170705,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9331316947937012,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9438698981937609,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9473904559486791,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647668317744607,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9677473463510212,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9725314880672254,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812204084898296,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829375445842743,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9819649944179937,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842962277562994,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907641873547905,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921399896082125,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948227252615126,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952985334553217,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962918205480826,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985778481748543,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.self_attn": [
+ {
+ "accuracy": 0.9600111120625546,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9612005070636147,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.964541560725162,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9708996415138245,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9782786526178059,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9797442618169283,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9834015651753074,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852686540076607,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868073243843881,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9877730953065973,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899090991208428,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909870146136535,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9912349362122385,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920229943175065,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949882155970523,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956769892259648,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960148891336039,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9974033789415109,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998908018399226,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.24.mlp": [
+ {
+ "accuracy": 0.9309274334656565,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9329412987357691,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9434307499935752,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9468039964374743,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.964802716907702,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9676379341828196,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9723371110464397,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814152843073795,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830747732990667,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820525065848702,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842901574937921,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908649152831027,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992164491822845,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949612405739332,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953612912642328,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963140507277689,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986349122696802,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.self_attn": [
+ {
+ "accuracy": 0.9629685784641066,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.966292845575433,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9706497600204066,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9770395802824121,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9812664436666589,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825177678936406,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9859766771918849,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9875544631167462,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890076141608389,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9899718353622838,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913101808020943,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920592260988135,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9920864560102162,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9931446266801733,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995470314825836,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964580088853836,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963522063274133,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980500654170388,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990420024842024,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.25.mlp": [
+ {
+ "accuracy": 0.9309582647524381,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9330058160581087,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9429548539613423,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9461247419056139,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9648494093041671,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9677643054410031,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.972257701974166,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9814389065692299,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9831683055350655,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9820814869905773,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843341614070692,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990873825393225,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921791835835105,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949730893498973,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953598799674135,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962574216096025,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986128354150998,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.self_attn": [
+ {
+ "accuracy": 0.9692539509974027,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9705400811998468,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9734085955117878,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9785356945113132,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98225956841519,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9844387628530201,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9863943786997544,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892021245078036,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903048386699275,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911255232597652,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918489307165146,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9925915912577981,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926510790460988,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9935304832301641,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958161197994885,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9968136386259606,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996640493995265,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983592800992099,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990582437695641,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.26.mlp": [
+ {
+ "accuracy": 0.9307880589836522,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9327648313421952,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9425337816539564,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9456456962384676,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9646417467217696,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9674692153930664,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9718593108026605,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.981447066131391,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830925182292336,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9819755052265368,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9842149834883841,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908335373589867,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921296811417529,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949831903765076,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953331421864661,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962033327472838,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986436717016133,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.self_attn": [
+ {
+ "accuracy": 0.970002227707913,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9720134829220018,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9753984056021038,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9796412493053236,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9836482029212149,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852937397203947,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874603458141026,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9895802375517393,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908004010978498,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910707693350943,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9924462923878118,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9930942176203978,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9932637881291541,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939152543482027,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960527443572095,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9970473327526921,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9967361741552228,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984502204154667,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9991283381455823,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.27.mlp": [
+ {
+ "accuracy": 0.9293055346137599,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9312632648568404,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9409142418911582,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9439750847063566,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9638123637751529,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9667457373518693,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9710822230891177,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9810523924074674,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9827425150494826,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815605721975628,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9838636172445197,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906106544168372,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919505621257582,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948667911322493,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952188992970868,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960712065037928,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986038375645876,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.self_attn": [
+ {
+ "accuracy": 0.963465646693581,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9654019349499753,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691075588527479,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9750660861793318,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808185367207778,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9818178995659477,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9855265554628874,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9868177206892716,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884591361409739,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891816479595084,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907054908965763,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915861456017745,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915831520369178,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923868547928961,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949667892957988,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961160746844191,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.995863150609167,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9978448852504554,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9988796908016268,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.28.mlp": [
+ {
+ "accuracy": 0.9270161453046297,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9290557974263242,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9388320006822285,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9419407468093068,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9626486834726835,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656501130053872,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9700630087601511,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803985106317621,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821768795189104,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9809739338724237,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.98332973530418,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9903244619306765,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916867558893404,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947033418636573,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950806416179004,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9959546653063673,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985625606618429,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.self_attn": [
+ {
+ "accuracy": 0.9652475400974876,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9684306320391203,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9719184825294896,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9775490556892595,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9821797123080805,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9835380284409774,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9864399715473777,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988123326709396,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989051900411907,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9901048784193239,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914546067777433,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9923114141351298,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992552460808503,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933685495665199,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954900094553044,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966395211062933,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962165598806582,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980227398244959,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.999000453537232,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.29.mlp": [
+ {
+ "accuracy": 0.9267352254767167,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9287286808616236,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9383278395000257,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9413716102901258,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.96244919613788,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9654836340954429,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9697915754820171,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.980351480998491,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.982123221221723,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808852390239113,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832572701730227,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990283414721489,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916586938657259,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946969441677395,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950463650257964,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9958899562296114,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985566190198848,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.self_attn": [
+ {
+ "accuracy": 0.9681929381270158,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9723133444786072,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9760147019436485,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9799939535166088,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.984251657598897,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852431118488312,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9873773573260558,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892137332966453,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890589133689278,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893320587120558,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9922471720921365,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929647829971815,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927807779688584,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993865115077872,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957656640755502,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969548251675932,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964527750485822,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982881075457523,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990814982080146,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.30.mlp": [
+ {
+ "accuracy": 0.9251628863184076,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9272636300639102,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9367078166258962,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9397539276825755,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9616612729273344,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9647756846327531,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9690155763375131,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9798959760289443,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9816839193042955,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9804812293303641,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829060482351404,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900787566837511,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9914813276968504,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945701388152022,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949461158953214,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957820879001367,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985090780415034,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.self_attn": [
+ {
+ "accuracy": 0.9646941517528734,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662828131725913,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9701378063151711,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9760073156733262,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811246599021711,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9823018783017209,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9860062850149054,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9874923582139769,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891222911445718,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989570215344429,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9910432688499752,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9916869049009523,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9919693422944922,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9927399699625216,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953438882765017,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9963776771175233,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962867400363872,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9980214195031869,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989992086040346,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.31.mlp": [
+ {
+ "accuracy": 0.9239390272843211,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9260848321412739,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9356427318171451,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9388030767440796,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.960991200647856,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9641463662448683,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9684760350930064,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9794804751873016,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9813288967860373,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801216925445356,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825952664801949,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898917329938788,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913212335423419,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994453098036741,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948494226524704,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957109646577584,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.998477683255547,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.self_attn": [
+ {
+ "accuracy": 0.9691472806428608,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9704269139390242,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9745114978991056,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9788581258372256,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9830357150027627,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9843127382429022,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871108924087725,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887701994494388,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905001105446565,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907136347733045,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918244888907984,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928063938492223,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9928269127481862,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938339708667052,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9957846763886904,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9969022770069147,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9964404749242883,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982616964139437,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989187480195573,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.32.mlp": [
+ {
+ "accuracy": 0.9225679071326005,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9248171856528834,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9343761456640143,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9375901347712466,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9602900492517572,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9635214617377833,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9678738179959749,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9790268665865848,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9809278092886272,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9797725677490234,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822836941794345,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9897120987114153,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9911672206301438,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943348467350006,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9947662369201058,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9956333758799654,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9984322247144423,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.self_attn": [
+ {
+ "accuracy": 0.9628744846896121,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9642276575690821,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9691251516342163,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9752921085608633,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9808087741073809,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9811972803191135,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866737393956435,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987180832969515,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885677310981249,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9890315650325072,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9905902202192106,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9913237800723628,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918064224092584,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992418519760433,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954517146474436,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9962600987208518,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9965136360965277,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979699426575711,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9990482741084538,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.33.mlp": [
+ {
+ "accuracy": 0.9194375025598627,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9218418096241198,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9319344068828382,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9353051875766955,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9587234509618658,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9621030437318903,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9666866660118103,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9782008324798784,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9801453900964636,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9789833222564898,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9815922144212221,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893161756427664,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908266318471808,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941011373149721,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945640536515337,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954751780942867,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983758304856325,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.self_attn": [
+ {
+ "accuracy": 0.9539086536357277,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9566202006841961,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9603717672197443,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9669633256761652,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9759738696248907,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9772506795431438,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9816622843867854,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9833513388508245,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856140503757879,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985951155424118,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884828846705588,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9894061747350191,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893442502147273,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900345316058711,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9938510879874229,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953767182795625,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9948945014100326,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997308793624765,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985867582849766,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.34.mlp": [
+ {
+ "accuracy": 0.9202155502218949,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9225500445616872,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9324952050259239,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9358495662086889,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9590592917643095,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9624388751230741,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9669558907810011,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9783489170827364,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9803025502907602,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9791136478122912,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817421044173994,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893667384197837,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908938407897949,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941346762995971,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9945776309621962,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9954886087461522,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.99837915942465,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.self_attn": [
+ {
+ "accuracy": 0.9619252869957372,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9651781728393153,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9700885258222881,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9756035114589491,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9807395699777102,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9817744540540796,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.985395400147689,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9866739135039481,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9884347037265175,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989096075296402,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.990826353430748,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9915119660528082,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9918387336166281,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.992639947878687,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951361213859758,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9961876292762003,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9960020392348892,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9977982052062687,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9989078600743884,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.35.mlp": [
+ {
+ "accuracy": 0.9184076472332603,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9209782261597483,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9308560145528693,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9343051722175196,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9581821623601412,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9616382059298063,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9661648430322346,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9777272892625708,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9797387154478776,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9786785966471622,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9813397448313864,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9891442711416044,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9906904493507586,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9939586084924246,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9944715656732258,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9953966823063398,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9983124768263415,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.self_attn": [
+ {
+ "accuracy": 0.9529309774699964,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563378628931547,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9616725695760626,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9689355963154843,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9767914335978659,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9774453985063654,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9832302080957513,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9839135235861728,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9850896094974718,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861610876886469,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882982718317133,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893639064148853,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893980332111058,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9908717721700668,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937737411574313,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951376483628624,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9949005669669101,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996732604934981,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9986592142009422,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.36.mlp": [
+ {
+ "accuracy": 0.9151029963242381,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9177036661850779,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9283214242834794,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.93205221703178,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9563468506461695,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9599899304540533,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9648639591116654,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9766785282837717,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.978811285997692,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9776524195545598,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9804815744098864,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9885846203879306,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9902452904927103,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993651128521091,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9941737247925055,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9952088265042556,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9982351009782992,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.self_attn": [
+ {
+ "accuracy": 0.9444580203608463,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9479021210419505,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9545166398349562,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9651918411254883,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9721917039469669,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9728113444227922,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9822569975727483,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9829630334126321,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846781633402172,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9856586550411425,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9861527555867245,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9871658957318256,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9882368359126543,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9892345611986361,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9933780596444481,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994575680478623,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9951764827496127,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.996942840908703,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9987066751836162,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.37.mlp": [
+ {
+ "accuracy": 0.9073905191923443,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9107915664974012,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.922188727479232,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9260959248793752,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9522699619594374,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9565829540553846,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9618473837250158,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9745053884230162,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9767252175431502,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754874376874221,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9787243808570661,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.987427000936709,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.989338924226008,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9929013550281525,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9935392282511059,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9946325037040209,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9979976007812902,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.self_attn": [
+ {
+ "accuracy": 0.94476916915492,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.953280646550028,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9585668381891752,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9656539527993453,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9750054462959892,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9754936020625266,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9819665551185608,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9824950475441782,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9847359123982882,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9853802448824832,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870157045753378,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.988417901490864,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887010651199442,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9898977428674698,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.993297132222276,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.994746764239512,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9942980134173444,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9971190206706524,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985733363581332,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.38.mlp": [
+ {
+ "accuracy": 0.8956377067063984,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9004529902809545,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9113116389826724,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.915102757905659,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9467076000414396,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9521799997279519,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9571061385305304,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9710041190448561,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9732404476717899,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9722915072190134,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9760805164512835,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9853322207927704,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9879535460158398,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909730404615402,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9926855995466835,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9937572400820883,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997547815504827,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.self_attn": [
+ {
+ "accuracy": 0.9524514361431724,
+ "total_bits": 111655168,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9543147369434959,
+ "total_bits": 114997504,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9602508231213218,
+ "total_bits": 119288192,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.96959220108233,
+ "total_bits": 139930496,
+ "q_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9778528746805693,
+ "total_bits": 165321856,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9784758577221319,
+ "total_bits": 165487616,
+ "q_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64,
+ "3": 64
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9845520853996277,
+ "total_bits": 211983488,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9852710444676248,
+ "total_bits": 212149248,
+ "q_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9846528768539429,
+ "total_bits": 213960704,
+ "q_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 64
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9870804077700565,
+ "total_bits": 216920576,
+ "q_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9887741764909342,
+ "total_bits": 217916416,
+ "q_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9893108967103457,
+ "total_bits": 219400192,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9900627042117872,
+ "total_bits": 223787264,
+ "q_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 64
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 64,
+ "4": 64
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9907222405860299,
+ "total_bits": 226914816,
+ "q_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "5": 32
+ },
+ "bits": [
+ 5
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9943688245196092,
+ "total_bits": 274898048,
+ "q_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9950569052445261,
+ "total_bits": 279343616,
+ "q_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9955095668372355,
+ "total_bits": 316841088,
+ "q_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.997169871863566,
+ "total_bits": 332263936,
+ "q_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 32
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "6": 32
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9985809284997614,
+ "total_bits": 421698688,
+ "q_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "k_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "v_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "o_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.layers.39.mlp": [
+ {
+ "accuracy": 0.8709318135914049,
+ "total_bits": 492374592,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.8768085680509868,
+ "total_bits": 510724672,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "6": 32,
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 6,
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.2,
+ 0.75
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.8875402651335064,
+ "total_bits": 569864704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "3": 32
+ },
+ "bits": [
+ 5,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.8924545865309865,
+ "total_bits": 639496704,
+ "gate_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "3": 64,
+ "2": 64
+ },
+ "bits": [
+ 3,
+ 2
+ ],
+ "bits_prop": [
+ 0.3,
+ 0.7
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9342471486643741,
+ "total_bits": 721045344,
+ "gate_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128,
+ "3": 128
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9403658917075709,
+ "total_bits": 740855808,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 8,
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9450874077646356,
+ "total_bits": 796587104,
+ "gate_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32,
+ "3": 32
+ },
+ "bits": [
+ 4,
+ 3
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.963830756513696,
+ "total_bits": 910810592,
+ "gate_proj": {
+ "group_size": {
+ "4": 128
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9670537553335491,
+ "total_bits": 924225536,
+ "gate_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "4": 32
+ },
+ "bits": [
+ 4
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9662234061642697,
+ "total_bits": 937477984,
+ "gate_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 128,
+ "4": 128
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9707195256885729,
+ "total_bits": 957288448,
+ "gate_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "5": 32,
+ "4": 32
+ },
+ "bits": [
+ 8,
+ 5,
+ 4
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9825706089797773,
+ "total_bits": 1153910624,
+ "gate_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128,
+ "5": 128
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9851871995549453,
+ "total_bits": 1173721088,
+ "gate_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.25,
+ 0.75
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 32,
+ "5": 32
+ },
+ "bits": [
+ 8,
+ 6,
+ 5
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.1,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9896224196019926,
+ "total_bits": 1336788832,
+ "gate_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "6": 128
+ },
+ "bits": [
+ 6
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 32,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.05,
+ 0.95
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9909934087803489,
+ "total_bits": 1380525056,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.15,
+ 0.85
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9921991817261043,
+ "total_bits": 1505043456,
+ "gate_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128,
+ "6": 128
+ },
+ "bits": [
+ 8,
+ 6
+ ],
+ "bits_prop": [
+ 0.1,
+ 0.9
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ },
+ {
+ "accuracy": 0.9966148887025682,
+ "total_bits": 1769284608,
+ "gate_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "up_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ },
+ "down_proj": {
+ "group_size": {
+ "8": 128
+ },
+ "bits": [
+ 8
+ ],
+ "bits_prop": [
+ 1
+ ],
+ "scale_bits": 4
+ }
+ }
+ ],
+ "model.norm.norm": null,
+ "lm_head.linear": null
+ },
+ "last_module_idx": 82
+}
\ No newline at end of file