{ "measurement": [ { "key": "model.layers.0.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.01589808240532875, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.015738019719719887, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.007503035943955183, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.007460711058229208, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.007413501851260662, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.004650091752409935, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.02256692573428154, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.015587257221341133, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.007431911304593086, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.00738942576572299, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.007877077907323837, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.008847747929394245, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.007376484572887421, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.005248768720775843, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.004625541158020496, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.005469747819006443, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.004617737140506506, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.004417936783283949, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.004616306629031897, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0044160992838442326, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004699076525866985, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.004615591838955879, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004444971214979887, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004414624534547329, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.016907159239053726, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.016653327271342278, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.008009353652596474, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.007937832735478878, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.007860241457819939, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.003796379780396819, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0205422043800354, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0164569690823555, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.007898373529314995, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.007812931202352047, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.008084569126367569, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.008617378771305084, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.007792370393872261, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0047897836193442345, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0037375367246568203, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.004778372589498758, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.003713395446538925, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.003310716012492776, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0037092785350978374, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0033047909382730722, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00346372090280056, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0037074615247547626, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.002942040329799056, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003300972282886505, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.061544571071863174, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.05136384069919586, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.04205751419067383, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.035500667989254, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.027456603944301605, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.02043856307864189, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04791000857949257, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.03791726008057594, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.029165245592594147, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.022427693009376526, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.022575244307518005, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02542617730796337, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.018486347049474716, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.013602984137833118, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.012188377790153027, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012878838926553726, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0076453485526144505, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.007001922000199556, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.006758994422852993, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.005897275172173977, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006691007409244776, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006127041298896074, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004142667166888714, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004511406645178795, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.019689233973622322, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.011421140283346176, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.006516108755022287, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.008056389167904854, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.007750033866614103, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.003000957425683737, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.012133406475186348, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.010852067731320858, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.009205888025462627, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.005471847951412201, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.005842262879014015, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.006189314182847738, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.005356536712497473, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0040544746443629265, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.003693552687764168, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.003223685547709465, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0026210250798612833, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.002522526541724801, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.002314739627763629, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.002114908304065466, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0019709919579327106, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0023013376630842686, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0016143402317538857, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.001984895206987858, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.06395174562931061, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.0600140243768692, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.05872626602649689, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.053398292511701584, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.028660131618380547, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.02744341641664505, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.03191111236810684, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0295499786734581, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.029022101312875748, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.02613859996199608, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.024974040687084198, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.016221607103943825, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.01424651499837637, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.01386349182575941, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.013770747929811478, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.008179164491593838, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.007603905163705349, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.007572716102004051, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0071570309810340405, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.007104172371327877, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.004612748045474291, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.005233668722212315, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.004486100282520056, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.004181180149316788, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.09577932208776474, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.08987627178430557, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.08802971988916397, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.08013403415679932, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.04285908862948418, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.04108980670571327, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.047661371529102325, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0440841019153595, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.04340611398220062, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.03907637298107147, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.03735220059752464, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.02404138073325157, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.020924776792526245, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.020398858934640884, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.020273111760616302, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.012010438367724419, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.010626543313264847, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.010587913915514946, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.009905118495225906, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.009830408729612827, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.006415458861738443, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.006629429291933775, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.006228921003639698, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.004666161723434925, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.0.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.058144379407167435, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.048755962401628494, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.032807838171720505, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.02982608787715435, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.02454560622572899, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.016132667660713196, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.046447623521089554, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.04139646515250206, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.026115095242857933, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.02130313403904438, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.021143661811947823, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.020382994785904884, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.01919155940413475, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.013476896099746227, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.011543233878910542, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.011790608055889606, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.009113701060414314, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.00797137338668108, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.008687264285981655, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.007403274066746235, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.008274715393781662, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.00843406654894352, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.006750178057700396, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.006859478075057268, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.01506334450095892, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.01197891030460596, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.009205611422657967, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.008089311420917511, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.006488214246928692, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.00435257563367486, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.011717337183654308, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.009367273189127445, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.007040437776595354, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.005327135790139437, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.005652622785419226, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0062243761494755745, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0046323915012180805, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0032449066638946533, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0027957146521657705, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.003211063565686345, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0018378093373030424, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0016330723883584142, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0016415053978562355, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.001368207624182105, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0017173943342640996, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0015320570673793554, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0010694540105760098, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.001101456000469625, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.013861903920769691, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.010819146409630775, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.008353037759661674, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.007321909070014954, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.005826502572745085, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.00398506224155426, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.009444487281143665, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.008353685960173607, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.006386568769812584, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.004681543912738562, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.004761019255965948, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.004804341122508049, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.004008442163467407, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0028802661690860987, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.002550592413172126, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0024172167759388685, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.001626314828172326, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.001486513763666153, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.001428251271136105, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00122502027079463, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0013075373135507107, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.001321829273365438, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0009145527146756649, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0009686561534181237, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08256427198648453, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.06958182156085968, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.06402995437383652, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.05498407036066055, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.036972083151340485, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.031523510813713074, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04598162695765495, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04215122386813164, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.03885921835899353, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.02984389290213585, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.027909625321626663, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.023481842130422592, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02026517130434513, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.017881978303194046, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.017278721556067467, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011777224950492382, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009456731379032135, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009236795827746391, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008077519945800304, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.007681692950427532, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0062309755012393, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006387303117662668, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005402806680649519, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00447050528600812, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13301411271095276, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08464869111776352, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.06414999812841415, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.059372734278440475, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05779210850596428, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.034725431352853775, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07685241103172302, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06826530396938324, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06314961612224579, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0353289358317852, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03679342567920685, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03952633962035179, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03373377025127411, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.029121506959199905, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02792181819677353, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.020385924726724625, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01710251159965992, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.016796959564089775, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013594652526080608, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012851766310632229, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012059873901307583, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013389134779572487, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01068966556340456, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011197968386113644, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.09589787572622299, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.09102068841457367, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.08956883102655411, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.08192096650600433, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.04267232492566109, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.04130198433995247, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.04715731739997864, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.04350990802049637, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.04308196157217026, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.03942660987377167, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.03783303499221802, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.023876186460256577, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.020798107609152794, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.020440760999917984, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.02035444974899292, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.011989639140665531, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.010953878052532673, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.010929320938885212, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.010365542024374008, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.010316668078303337, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.006662577390670776, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.0072233835235238075, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.006546929012984037, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.005550861824303865, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.12884408235549927, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.12264963239431381, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.12082388252019882, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.11096508055925369, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.05759797990322113, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.055816374719142914, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.06339683383703232, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.05864197760820389, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.05810606852173805, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.05336057394742966, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.05117559805512428, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.03187095746397972, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.027693552896380424, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.027242831885814667, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.027136676013469696, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.015869636088609695, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.013870649971067905, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.013839388266205788, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01303633488714695, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.012968668714165688, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.008262597024440765, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.008149310946464539, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.008109505288302898, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.005200968589633703, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.1.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.1175626590847969, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.10571214556694031, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.09480548650026321, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.08603975176811218, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.054339755326509476, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.04389587417244911, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.07444056868553162, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.06727191805839539, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.05583367496728897, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.048819541931152344, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.047465283423662186, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.03899756073951721, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.03541440889239311, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.025496546179056168, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.023489076644182205, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.01852707378566265, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.01390520017594099, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.012929562479257584, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.012886869721114635, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.011728787794709206, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.010830172337591648, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01049125287681818, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.008840062655508518, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.007542859762907028, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.03173770383000374, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.029071848839521408, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0278706643730402, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.02484150603413582, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.014394987374544144, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.013290390372276306, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.017495421692728996, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.01556132361292839, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.014683831483125687, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.012682502157986164, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.01206846721470356, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.008948145434260368, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.007475399412214756, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.006914439145475626, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.006774048786610365, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.004526088945567608, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0036324486136436462, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.003568030660971999, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0033008845057338476, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.003211353439837694, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0024261376820504665, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0023662419989705086, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0021586306393146515, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.001655321684665978, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.02502710185945034, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.022810908034443855, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0218617245554924, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.019478244706988335, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.011319913901388645, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.010437613353133202, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.013365224003791809, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.012198277749121189, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.011560154147446156, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.00991479866206646, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.00936425942927599, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.006745975464582443, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.005814775824546814, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.005424342583864927, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.005332685075700283, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0033846262376755476, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0028308029286563396, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0027926182374358177, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0025547053664922714, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00249558687210083, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.001798492856323719, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0018091913079842925, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0016692191129550338, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00124493264593184, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12804390490055084, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11901438981294632, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1160898208618164, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10452932864427567, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.058905914425849915, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0558064840734005, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06607893109321594, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06102341413497925, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05989040434360504, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05258475989103317, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04969862475991249, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.033491626381874084, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029060009866952896, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.028100788593292236, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027869541198015213, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01671348139643669, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014323156327009201, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01424939651042223, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013032475486397743, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012886228039860725, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008623737841844559, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008492001332342625, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008266176097095013, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005335894413292408, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1619553565979004, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.13164253532886505, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12129480391740799, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0956224575638771, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.07270348072052002, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.061710622161626816, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0883617103099823, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07985790073871613, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.07625700533390045, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.054420746862888336, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04828181490302086, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.045533958822488785, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.039070434868335724, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03590807318687439, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03514108806848526, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.023382334038615227, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02029845304787159, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02009110152721405, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0171658955514431, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.016679903492331505, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013773985207080841, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014887056313455105, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012844116427004337, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012168381363153458, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15614163875579834, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.14845094084739685, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1461970955133438, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13435830175876617, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07148832082748413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.06913221627473831, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.078983373939991, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07279261201620102, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07218316197395325, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06599520891904831, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06364012509584427, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04019126668572426, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03476698696613312, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.034204963594675064, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03406846895813942, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.020168909803032875, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.017934005707502365, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01789405569434166, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.016877299174666405, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01680079475045204, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01102457195520401, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011284573003649712, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010845827870070934, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008149647153913975, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1864161342382431, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17739902436733246, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17472732067108154, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1608077734708786, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08544597029685974, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08267241716384888, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09437350183725357, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0870075672864914, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08627035468816757, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07892817258834839, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07605167478322983, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.047753892838954926, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04132885858416557, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04065044969320297, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04049324616789818, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023855624720454216, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020777372643351555, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020730536431074142, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019472453743219376, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019372092559933662, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012608866207301617, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012298150919377804, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012392476201057434, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007968016900122166, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.2.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.17954760789871216, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.16341368854045868, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.15713737905025482, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1420702487230301, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.0810464397072792, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.07510972768068314, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.0942901223897934, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.0867195576429367, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.08323374390602112, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.07185380905866623, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.06879085302352905, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.04786218702793121, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0414089560508728, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.03887508437037468, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.03826503828167915, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.024013962596654892, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.020389769226312637, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.0201723650097847, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.018616409972310066, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.01821747049689293, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.012997063808143139, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.013142043724656105, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0121947405859828, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.009255553595721722, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.04493671655654907, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.04115230590105057, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.03959603235125542, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.03570156171917915, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.02067604288458824, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.019405202940106392, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.024556651711463928, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.021952899172902107, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.021035093814134598, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.01815122738480568, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.01732257939875126, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.012604700401425362, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01055053062736988, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.009997710585594177, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.009864185005426407, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.006381293758749962, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.005230492912232876, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.005174452904611826, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.004689077381044626, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.004604176618158817, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.003411108162254095, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.003306574421003461, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.003236405784264207, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0022850055247545242, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.03509616479277611, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.03237925469875336, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.031118249520659447, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.027913391590118408, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.016054686158895493, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.014968864619731903, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.018955666571855545, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.017286736518144608, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.01634662039577961, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.014248663559556007, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.01358751580119133, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.009607474319636822, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.008253959938883781, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.007720396388322115, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.007593564223498106, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.004820787347853184, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.004054326564073563, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.00399902556091547, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.003701473120599985, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00362150720320642, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0025726472958922386, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.002622108208015561, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.002391028916463256, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0018514128169044852, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.16509829461574554, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1542235165834427, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15061523020267487, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1361851990222931, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.07649979740381241, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07265672832727432, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08622736483812332, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07906026393175125, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.07770704478025436, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06860486418008804, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06519706547260284, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.043730489909648895, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03769558668136597, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03653869405388832, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.036256007850170135, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.021839817985892296, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.018612226471304893, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.018524732440710068, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.016986360773444176, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.016813691705465317, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.011331452056765556, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01099339034408331, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.010886246338486671, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006890960969030857, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12344516813755035, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10650678724050522, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0963781327009201, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08004307001829147, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05632775276899338, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04782517999410629, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07987351715564728, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06757057458162308, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05849836766719818, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.046138208359479904, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0431450679898262, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.040884725749492645, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.033837445080280304, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.028842153027653694, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027501985430717468, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02177630364894867, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01761387474834919, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01708044484257698, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.016031237319111824, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01529565081000328, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013345223851501942, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014370612800121307, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.011618898250162601, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012349715456366539, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1687881052494049, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15994007885456085, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15727262198925018, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14428092539310455, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07872416824102402, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07583344727754593, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08696135133504868, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08036269247531891, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07958508282899857, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07217877358198166, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06927557289600372, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04455867037177086, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03881794586777687, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03808993101119995, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.037913091480731964, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02235601097345352, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020441344007849693, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020390959456562996, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019208785146474838, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019106624647974968, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012366103939712048, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013494830578565598, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012139726430177689, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010363709181547165, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20753711462020874, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1967647224664688, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19349762797355652, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.17746502161026, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09653526544570923, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09301444888114929, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10668913275003433, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09855174273252487, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09761375188827515, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08850604295730591, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08492840081453323, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0542733334004879, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.047065287828445435, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.046193480491638184, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.045993153005838394, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.027115335687994957, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.023907510563731194, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0238471869379282, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022303318604826927, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022173751145601273, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014429249800741673, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01460838969796896, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.0141373872756958, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010006592608988285, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.3.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.06366485357284546, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.05972202867269516, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.03704970329999924, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.033944785594940186, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.027045466005802155, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.015300089493393898, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.054839178919792175, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.05193417891860008, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.027629543095827103, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.02519320882856846, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.024873286485671997, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.023695314303040504, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.022466560825705528, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0082768015563488, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.008876374922692776, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.005574744660407305, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.005919251125305891, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.004453744273632765, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.005546292755752802, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.003914640750735998, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.004911405965685844, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.004966450855135918, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.002858330262824893, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.002525421790778637, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.058097224682569504, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.05332371965050697, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.05138642340898514, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.046014152467250824, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.02656673640012741, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.024735892191529274, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.030722500756382942, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.028314225375652313, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.027115393429994583, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.023443032056093216, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.022184954956173897, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.015571124851703644, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.013507128693163395, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.012723473832011223, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.012535963207483292, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.007779712788760662, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.006589310243725777, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.006515993736684322, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.005970334634184837, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.005850398447364569, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004071972798556089, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.004122646525502205, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0038105547428131104, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0027690534479916096, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.050570517778396606, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.04636377841234207, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.044611867517232895, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.03991467505693436, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.023090215399861336, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.021461447700858116, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.026959922164678574, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.024737250059843063, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.023584939539432526, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.020372578874230385, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.01934138499200344, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.013651297427713871, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.011790516786277294, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.011063976213335991, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.010888582095503807, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.006829210091382265, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.005717155989259481, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.005646560341119766, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.005174758844077587, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00506453774869442, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.003571483539417386, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0035701096057891846, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0033265589736402035, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.002371920272707939, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.17293380200862885, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16091807186603546, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15697622299194336, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1416974663734436, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08029846847057343, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07602723687887192, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09013238549232483, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.08319935202598572, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08163803070783615, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07169501483440399, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06781632453203201, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.045731332153081894, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03965964913368225, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.038350753486156464, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.038034532219171524, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.022816132754087448, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01952684298157692, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.019427847117185593, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.017753489315509796, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01755356788635254, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.011759868822991848, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.011541344225406647, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.011279593221843243, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.007200813386589289, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.17842510342597961, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15635938942432404, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14719389379024506, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12652306258678436, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08310087025165558, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07389968633651733, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10040596127510071, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09130856394767761, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08593308180570602, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06805060803890228, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06335537135601044, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05203484371304512, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.044728443026542664, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04095006734132767, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04000789299607277, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02644321694970131, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022727645933628082, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022392338141798973, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020000595599412918, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019425027072429657, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015065797604620457, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016299482434988022, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013911300338804722, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012940499000251293, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1800655722618103, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17022423446178436, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1672702133655548, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15285345911979675, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08391988277435303, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08059825748205185, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0925832986831665, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08572481572628021, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08489926904439926, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07645591348409653, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07293333113193512, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04703664779663086, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.040927644819021225, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.040118150413036346, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03991980105638504, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023493988439440727, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02066030725836754, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020608501508831978, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019165324047207832, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019045958295464516, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01235266774892807, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012481557205319405, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012081664055585861, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0083705959841609, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22084037959575653, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2088194340467453, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20521867275238037, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1876402646303177, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10288819670677185, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09883542358875275, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1134614497423172, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10510034114122391, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10412301123142242, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09378595650196075, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08945047855377197, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05754408985376358, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05006873607635498, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04907459020614624, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.048839908093214035, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028687167912721634, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024933427572250366, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024877596646547318, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02305913157761097, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022914918139576912, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014821409247815609, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014536495320498943, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014492448419332504, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009052659384906292, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.4.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.21863843500614166, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.1981007158756256, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1904744952917099, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17012150585651398, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10023742914199829, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09259866923093796, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.11581158638000488, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.10667577385902405, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10286744683980942, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08747708797454834, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0825594887137413, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.058954957872629166, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05100255832076073, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04810174182057381, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04738948121666908, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.0295250304043293, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.025057781487703323, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02481791190803051, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.022548796609044075, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022089378908276558, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.015810871496796608, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.015869425609707832, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01488030981272459, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.010936260223388672, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.06746368855237961, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.06192023307085037, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.059513553977012634, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.05321000888943672, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.030889922752976418, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.028696171939373016, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.03583667799830437, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.033133719116449356, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.031537991017103195, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.027221297845244408, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.025698214769363403, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.01815485768020153, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01582663506269455, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01483464427292347, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01459390390664339, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.009092849679291248, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.007735075429081917, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.00763988122344017, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.007012560963630676, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0068612489849328995, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004790919367223978, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0049284230917692184, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004461470991373062, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0033967162016779184, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.05766705051064491, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.052879054099321365, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.050749074667692184, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.04536837339401245, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.026428697630763054, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.024488991126418114, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.03104712814092636, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.028520267456769943, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.026991546154022217, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.02328876405954361, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.022097395732998848, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.015741482377052307, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.013614491559565067, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.012686513364315033, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.012457996606826782, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.00787927582859993, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.00658642640337348, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.006492042448371649, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.005965885706245899, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.005823390558362007, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004137141164392233, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.004173942841589451, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0038277467247098684, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0028209458105266094, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18440066277980804, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17139343917369843, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.16710707545280457, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15060371160507202, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08578219264745712, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08110292255878448, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.096604123711586, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.08901844173669815, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08720004558563232, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07638411968946457, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07214481383562088, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.049032777547836304, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04244416207075119, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04098047316074371, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.040635041892528534, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02447337843477726, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.020894495770335197, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02078319899737835, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018955817446112633, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.018736818805336952, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0126492353156209, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01237843744456768, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012105904519557953, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.007766778580844402, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.18900802731513977, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16629162430763245, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15785156190395355, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1311599314212799, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08798476308584213, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07916240394115448, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10384863615036011, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09458821266889572, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09074044972658157, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07143066823482513, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06392659991979599, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.053697846829891205, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04610753804445267, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.043126944452524185, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04240074381232262, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02728804387152195, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023634593933820724, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023401331156492233, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02058776654303074, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0201336070895195, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015522245317697525, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01647883653640747, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014609995298087597, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012915389612317085, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17804473638534546, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16774263978004456, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16463468968868256, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15013080835342407, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08332807570695877, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07979216426610947, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09217468649148941, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08529013395309448, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08437339216470718, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07553567737340927, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07193386554718018, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04697670415043831, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.040833041071891785, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.039941366761922836, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.039736028760671616, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02346758544445038, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0207463800907135, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02068813517689705, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019195085391402245, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019066499546170235, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012457084842026234, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01278753113001585, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012159676291048527, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008866610005497932, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2319273054599762, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21868851780891418, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2147243767976761, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1957315355539322, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10843721777200699, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1038798987865448, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11986125260591507, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11097406595945358, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10982184112071991, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09835304319858551, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09358564764261246, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06081484258174896, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.052906882017850876, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05177285894751549, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05149829760193825, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03034619614481926, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026302658021450043, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026230815798044205, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02422039955854416, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02404658868908882, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01568695530295372, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015338331460952759, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015296692959964275, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009517982602119446, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.5.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2265091985464096, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2052963376045227, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.197527214884758, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17610517144203186, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10412044078111649, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09623710811138153, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.11995387822389603, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11068414151668549, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10684677213430405, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09077324718236923, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08547316491603851, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06111210957169533, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05303529277443886, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.050068482756614685, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.049344468861818314, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.030662434175610542, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026220547035336494, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02598547749221325, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023611338809132576, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023146148771047592, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016507240012288094, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.016772953793406487, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015563437715172768, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011783423833549023, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08490923792123795, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07877608388662338, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07631022483110428, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0686320886015892, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03931042179465294, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0369391068816185, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04485601559281349, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04147682711482048, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.039991024881601334, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03497514873743057, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03306611627340317, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02277681976556778, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.019850268959999084, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.018880560994148254, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.018645789474248886, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011402915231883526, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009828940033912659, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009735418483614922, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.00897110253572464, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008820843882858753, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006014061160385609, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00618319446220994, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0056889792904257774, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004264041781425476, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.06990548223257065, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.06489100307226181, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.06277943402528763, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.05650925263762474, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03238431364297867, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.030393505468964577, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.037436820566654205, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.034380953758955, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.03295261040329933, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.02885816991329193, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.02743702009320259, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.01901126839220524, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.016429008916020393, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.015536032617092133, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.015322156250476837, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.009509487077593803, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.008031537756323814, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.007943828590214252, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.007324270438402891, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.007188665680587292, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00498188566416502, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.004993846639990807, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004685099236667156, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0033328705467283726, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20362955331802368, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.190038800239563, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18562956154346466, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16766659915447235, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09504549950361252, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09017348289489746, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10652864724397659, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09833182394504547, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09655654430389404, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08502964675426483, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08035115152597427, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.054076019674539566, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04688603803515434, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04542519897222519, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04507200047373772, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02697075717151165, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023088332265615463, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022977937012910843, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021010952070355415, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02078811265528202, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013869741931557655, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013577399775385857, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013324913568794727, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00840217899531126, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1831262707710266, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15738680958747864, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1465478390455246, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12088461965322495, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0848502442240715, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07399183511734009, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10293339192867279, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09398045390844345, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08785679191350937, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0670640766620636, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.060443416237831116, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05314305052161217, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04569016396999359, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04157855734229088, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04056437313556671, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02674393728375435, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.022763004526495934, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.022382184863090515, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.019521480426192284, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.018865078687667847, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014987079426646233, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016058001667261124, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013716636225581169, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012445314787328243, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1742590069770813, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.164024218916893, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1608808934688568, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1464506983757019, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08163022249937057, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07807600498199463, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09036494791507721, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08370830863714218, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08272938430309296, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07383126765489578, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07017698884010315, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04594685509800911, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04000864923000336, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03906620666384697, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03884512186050415, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022947166115045547, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020091233775019646, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02003181353211403, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018510691821575165, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018376080319285393, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.0120121194049716, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012108427472412586, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011701982468366623, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008037574589252472, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23582549393177032, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22205904126167297, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21789926290512085, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19846338033676147, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11046658456325531, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10569053143262863, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12220314890146255, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11318177729845047, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1119355708360672, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09994897991418839, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09501954913139343, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.062052980065345764, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05400334298610687, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.052769072353839874, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.052478790283203125, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03097688779234886, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026827827095985413, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026742225512862206, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02465883269906044, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02446960099041462, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016018738970160484, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015687979757785797, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015597797930240631, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009763636626303196, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.6.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2273510843515396, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2056504338979721, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19741132855415344, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17593654990196228, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10448979586362839, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0962083637714386, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12122781574726105, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11169804632663727, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10734226554632187, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0909004956483841, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0857231393456459, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.061756085604429245, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05354166403412819, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05027757212519646, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04949048534035683, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.030970297753810883, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02637433633208275, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026097187772393227, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02373422682285309, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023209374397993088, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016684802249073982, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.016978908330202103, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01564732939004898, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011946083046495914, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0835205689072609, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07755818963050842, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07519623637199402, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06766137480735779, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.038668833673000336, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03634778782725334, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04414183273911476, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.040801726281642914, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.03930642455816269, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.034426167607307434, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.032598916441202164, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.022412510588765144, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.019486267119646072, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01852521300315857, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.018297001719474792, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011206052266061306, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009573351591825485, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009479240514338017, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008722847327589989, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00857613980770111, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005865751765668392, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005924156401306391, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005539842415601015, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003957192879170179, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.07035123556852341, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.06539473682641983, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.06324166059494019, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.05692853778600693, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.032595906406641006, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03058023191988468, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.03760959953069687, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.034681111574172974, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.033153023570775986, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.029081694781780243, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.02760492078959942, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.019056176766753197, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.01656177081167698, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01562798209488392, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.015402882359921932, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.009532440453767776, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.008072423748672009, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.007974735461175442, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.007361237891018391, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.007215775083750486, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.004978260491043329, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005012056324630976, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.004668927285820246, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003325848840177059, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19938251376152039, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1862238645553589, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18185563385486603, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1643192619085312, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09311780333518982, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08834721893072128, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10464801639318466, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09630291908979416, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09454917907714844, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08332129567861557, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07880841195583344, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05319518223404884, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04594153165817261, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04449675232172012, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.044149432331323624, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02653546817600727, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02263646386563778, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02252170816063881, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020600542426109314, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020379478111863136, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.013659308664500713, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013323921710252762, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013086436316370964, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0082698380574584, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2027430534362793, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17470593750476837, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.16450601816177368, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13849109411239624, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09274594485759735, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08203278481960297, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11163974553346634, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10182220488786697, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09707411378622055, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07567571103572845, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0691065788269043, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05766824260354042, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04988398775458336, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04573999345302582, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.044706616550683975, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0293166134506464, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025551436468958855, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025258036330342293, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02250393107533455, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021893072873353958, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016794130206108093, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018476052209734917, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015564626082777977, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014851730316877365, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16521227359771729, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1554524004459381, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15239505469799042, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13863535225391388, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07751312106847763, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07406499981880188, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08598575741052628, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0795975849032402, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07855759561061859, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07005250453948975, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06655003130435944, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04374491423368454, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.038094934076070786, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.037159088999032974, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03693949058651924, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02188125252723694, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019204651936888695, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.019139926880598068, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017701420933008194, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017562968656420708, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011527813039720058, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011719249188899994, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011219489388167858, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007948952727019787, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23191802203655243, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21826906502246857, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21409419178962708, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19495399296283722, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10895496606826782, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10420409590005875, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12083832919597626, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11173678934574127, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1103990226984024, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09851185977458954, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09369231760501862, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.061502326279878616, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05345247685909271, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05220798775553703, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05190815404057503, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030722251161932945, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026933783665299416, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026854874566197395, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024832310155034065, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02464202232658863, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016162242740392685, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016355840489268303, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015742037445306778, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.011019695550203323, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.7.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22091823816299438, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19922982156276703, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19110196828842163, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16983599960803986, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.101511150598526, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0932605117559433, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.11853126436471939, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1085221990942955, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10429760068655014, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08793140947818756, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08286991715431213, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06030833348631859, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.052181974053382874, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04903750494122505, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.048272404819726944, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.030482474714517593, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026072358712553978, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025811681523919106, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023479465395212173, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022981371730566025, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016850292682647705, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017225094139575958, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01586287096142769, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012630132026970387, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09350518137216568, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08699442446231842, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08439565449953079, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07598096132278442, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04330875352025032, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04076441004872322, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04941349849104881, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.045619942247867584, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04402130842208862, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.038627270609140396, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03658240661025047, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02505505643785, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.021796176210045815, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020758653059601784, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020511053502559662, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012524197809398174, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010730468668043613, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010634477250277996, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009789745323359966, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009638072922825813, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0065708220936357975, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00664075743407011, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0062256986275315285, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004451235290616751, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.07664470374584198, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07133254408836365, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.06902579218149185, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06216760352253914, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03553894907236099, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.033382561057806015, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04081867262721062, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.03770725056529045, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.036106761544942856, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03171519190073013, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.030097804963588715, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.020698431879281998, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.018008267506957054, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.017039017751812935, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.016800742596387863, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.010355307720601559, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.00880271103233099, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.008703337050974369, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008035782724618912, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.007885466329753399, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005423130467534065, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005464768968522549, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005107791628688574, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0036374512128531933, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21072417497634888, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19694043695926666, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1924123615026474, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17378593981266022, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09838373959064484, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09337445348501205, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11075858026742935, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10173114389181137, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09988965839147568, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0880822017788887, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08348459005355835, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.056272637099027634, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04853024333715439, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04700875282287598, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04664246365427971, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028092682361602783, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023895716294646263, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02377738431096077, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021756980568170547, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021524887531995773, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014467070810496807, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014046255499124527, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013839258812367916, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.008687760680913925, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19407764077186584, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17115984857082367, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.16363783180713654, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14008569717407227, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09034907072782516, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08174026757478714, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10429103672504425, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0958297923207283, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0928926169872284, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07465291023254395, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06738833338022232, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05389709025621414, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04667283222079277, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04423626884818077, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.043655265122652054, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.027300110086798668, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02420109137892723, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024027541279792786, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021304788067936897, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02093989960849285, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01550285704433918, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016755983233451843, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014783008955419064, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013180983252823353, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16663935780525208, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15678748488426208, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15372423827648163, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13993704319000244, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07834021747112274, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07486311346292496, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08689237385988235, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08047622442245483, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07941758632659912, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0708332359790802, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0673089325428009, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04430915042757988, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03861052542924881, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03765309974551201, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.037435922771692276, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022165821865200996, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.019626103341579437, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.019560949876904488, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018128296360373497, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017992470413446426, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01177254132926464, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01222696341574192, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011460578069090843, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00859900750219822, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23017725348472595, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21662694215774536, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21245846152305603, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19349071383476257, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10796810686588287, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10318595170974731, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11978890001773834, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11073480546474457, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10938919335603714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09761778265237808, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09279024600982666, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06080436334013939, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05287260189652443, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05161946639418602, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05131453275680542, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030391249805688858, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02630174532532692, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02621905319392681, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02416607365012169, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023974692448973656, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015803007408976555, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01548269484192133, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015370048582553864, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00976300798356533, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.8.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22535528242588043, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20308353006839752, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19487296044826508, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17316533625125885, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10363250225782394, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09515691548585892, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12062033265829086, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11064304411411285, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10647443681955338, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0896439179778099, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08443136513233185, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06134825572371483, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05313167721033096, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04997938126325607, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.049214430153369904, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03085150755941868, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026399046182632446, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026139916852116585, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02370350994169712, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02319600060582161, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01677483320236206, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01722240075469017, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0157503392547369, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012400353327393532, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0997600331902504, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09290669113397598, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.090174600481987, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0813826471567154, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.046373188495635986, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04370541870594025, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05284344404935837, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04878675192594528, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04711153358221054, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.041457399725914, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.039327047765254974, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026918772608041763, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02343173138797283, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022349560633301735, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02209264226257801, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013499872758984566, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011763861402869225, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011665774509310722, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010805020108819008, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010648101568222046, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0072079915553331375, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007570186164230108, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006847744341939688, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00544010940939188, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0844496488571167, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07869520783424377, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07624024152755737, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06886012852191925, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03925856947898865, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.036947838962078094, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04499077424407005, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04152851551771164, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.03988521173596382, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.035119038075208664, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03337203338742256, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.022858938202261925, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.019844122231006622, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.018827736377716064, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01858268678188324, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011431148275732994, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009736120700836182, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009632252156734467, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008905592374503613, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008751031011343002, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006002440582960844, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006042683031409979, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005667880643159151, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0040396638214588165, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20804214477539062, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1946030855178833, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19018858671188354, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1719209849834442, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09726400673389435, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09237620234489441, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10958318412303925, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.100611612200737, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09871400892734528, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08716654032468796, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08267735689878464, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05568147823214531, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04802662879228592, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0464969202876091, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04612937569618225, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.027791298925876617, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02368466928601265, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023563982918858528, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021595602855086327, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021364282816648483, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014359231106936932, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013998684473335743, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013746903277933598, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00876123458147049, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20633859932422638, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17925836145877838, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1674518883228302, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13836513459682465, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09498675912618637, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0841052234172821, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11574699729681015, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10585429519414902, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09862104058265686, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0765652060508728, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0692937895655632, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.059822503477334976, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.051860276609659195, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04703346639871597, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04582846909761429, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0303738322108984, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026463203132152557, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026037313044071198, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023205997422337532, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022475983947515488, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017463108524680138, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01940692961215973, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01602703332901001, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015688564628362656, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17625468969345093, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16585902869701385, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.162547767162323, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14795443415641785, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08290290832519531, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07919760048389435, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09202505648136139, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08518089354038239, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08401521295309067, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07490591704845428, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07122762501239777, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04694195091724396, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.040863342583179474, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.039838097989559174, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.039592139422893524, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02347472496330738, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020741529762744904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020670857280492783, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019149979576468468, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019006244838237762, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012464798055589199, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01289050281047821, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012125110253691673, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00901905819773674, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2363193780183792, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2224082052707672, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21813613176345825, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1986273229122162, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11093953251838684, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10606707632541656, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12311115860939026, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11382058262825012, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11240353435277939, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10026595741510391, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09528471529483795, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.062527135014534, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05434667319059372, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0530366376042366, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05272488668560982, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0312181543558836, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02701348066329956, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026918549090623856, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024809975177049637, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024606646969914436, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01619653031229973, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015871966257691383, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01574263535439968, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009975774213671684, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.9.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2369331419467926, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21364201605319977, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2047645002603531, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18156395852565765, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10916867107152939, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10016436874866486, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1271607130765915, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11696631461381912, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11219734698534012, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09441857784986496, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.088725745677948, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06495694816112518, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05629375949501991, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.052744586020708084, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05190315097570419, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.032708995044231415, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028063567355275154, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027762537822127342, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025237178429961205, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024681460112333298, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017924504354596138, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018569588661193848, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01678372174501419, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01361794862896204, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10656721889972687, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09925893694162369, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09643032401800156, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08694805204868317, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04948291555047035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04666059464216232, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05622748285531998, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0519503615796566, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05025944113731384, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04420554265379906, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0418577641248703, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028567923232913017, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02486121654510498, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023749692365527153, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0234849750995636, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014294476248323917, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012326443567872047, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01222159992903471, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01127976831048727, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011114465072751045, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007524739485234022, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007695144973695278, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007148637901991606, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005259209778159857, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08678892254829407, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08088448643684387, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07835783064365387, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07062126696109772, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0402534194290638, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03787811100482941, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04619995877146721, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04266007989645004, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04089280217885971, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.035988952964544296, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03413908928632736, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.023405548185110092, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.020370112732052803, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.01930147036910057, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01904459483921528, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011714193038642406, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009977237321436405, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009870151057839394, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009118836373090744, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00895417109131813, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006141718942672014, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006191564723849297, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005796849261969328, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004132237285375595, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20860053598880768, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1948627084493637, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19012245535850525, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17175154387950897, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0973530113697052, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09230600297451019, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10960303246974945, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10106568783521652, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09880878776311874, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08716154843568802, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08245272934436798, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05566452443599701, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04825705289840698, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04653477668762207, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04613436758518219, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02777020074427128, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02372504584491253, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023577183485031128, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021622609347105026, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021367434412240982, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01434176042675972, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014086970128118992, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013709591701626778, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.008854815736413002, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22047829627990723, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19532598555088043, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1856962889432907, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15777815878391266, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10273733735084534, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09256356209516525, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12081299722194672, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11090230941772461, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10594426840543747, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08550106734037399, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07688659429550171, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06261700391769409, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05426178500056267, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.050554487854242325, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04964430630207062, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031826045364141464, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028056705370545387, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02776065096259117, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024912089109420776, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02437732182443142, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01825156807899475, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019949620589613914, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017174599692225456, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015959640964865685, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.171627014875412, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16133908927440643, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15810096263885498, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14371246099472046, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08074816316366196, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07706280797719955, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08979185670614243, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08305037021636963, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08183927088975906, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07286570221185684, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0692986324429512, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04588651284575462, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03990356996655464, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03887799382209778, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03864462673664093, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022977108135819435, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020381329581141472, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020310794934630394, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018828317523002625, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018680432811379433, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012339010834693909, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012860780581831932, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012006713077425957, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009212151169776917, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23384365439414978, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2199087291955948, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2156444489955902, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19599023461341858, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10968098044395447, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10474374890327454, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12189695984125137, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11256897449493408, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11115007102489471, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09895634651184082, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09398683160543442, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.061811819672584534, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05376908555626869, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.052454594522714615, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0521385557949543, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03091857209801674, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026760170236229897, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026668787002563477, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024552518501877785, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024356067180633545, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016149977222085, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01580161601305008, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015703897923231125, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010024623945355415, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.10.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23962846398353577, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21619758009910583, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.207618847489357, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18393121659755707, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1104421615600586, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10157729685306549, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1273830533027649, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11753568798303604, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11338528245687485, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09539686143398285, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0894109234213829, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06504123657941818, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.056499943137168884, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.053269051015377045, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05248350650072098, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.032731179147958755, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028164327144622803, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027893930673599243, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025252392515540123, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02473042905330658, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017867816612124443, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018371278420090675, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01687335968017578, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01327761821448803, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10488075017929077, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0977649986743927, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09491833299398422, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08552094548940659, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.048722025007009506, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04595055803656578, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05545689910650253, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05125366896390915, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04949576035141945, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04352624714374542, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0411994531750679, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028163129463791847, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024490365758538246, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023358022794127464, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023086359724402428, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01408129557967186, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012074803002178669, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011967179365456104, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011028924956917763, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010857329703867435, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0073831528425216675, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0074650137685239315, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0070028156042099, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005003818776458502, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0870264321565628, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08113164454698563, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0785827711224556, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07085944712162018, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.040447432547807693, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03803946450352669, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.046324849128723145, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04286506026983261, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04105358198285103, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.036150429397821426, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.034268155694007874, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02350611239671707, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.020477062091231346, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.019381409510970116, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01911815255880356, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011755097657442093, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.00998462364077568, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009871339425444603, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009119515307247639, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00895270798355341, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006147570908069611, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006162980571389198, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005799867678433657, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004060541745275259, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20907585322856903, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1952534168958664, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19036982953548431, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17187784612178802, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09759420901536942, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09247142821550369, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11009274423122406, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10141755640506744, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09908843040466309, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08727473020553589, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0825612023472786, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.055893365293741226, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.048395588994026184, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.046652305871248245, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04622800275683403, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.027896439656615257, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023785963654518127, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023638660088181496, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021672995761036873, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021404625847935677, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014400358311831951, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014139124192297459, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013752440921962261, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.008883490227162838, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2130970060825348, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18776065111160278, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.176222562789917, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15427331626415253, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09847065806388855, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08779600262641907, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12033971399068832, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11027473211288452, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.102686807513237, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08329135179519653, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07778709381818771, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.062138840556144714, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05373261868953705, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0482860803604126, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04691775143146515, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03119255229830742, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02650527097284794, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026036201044917107, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02370915375649929, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0228818841278553, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01715606451034546, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018852612003684044, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01544656790792942, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014580232091248035, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17425818741321564, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16367371380329132, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.160289004445076, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1456049084663391, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08198702335357666, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07819350063800812, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09124504029750824, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08442138135433197, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08309830725193024, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07392305135726929, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07020705938339233, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04662609472870827, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.040583863854408264, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03949109837412834, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.039236195385456085, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023340759798884392, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020730098709464073, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020656010136008263, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019150113686919212, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018994415178894997, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012521548196673393, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013138591311872005, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01216554269194603, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009450172074139118, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2305525690317154, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2165864259004593, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21221430599689484, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19274422526359558, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10813817381858826, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1032034233212471, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12018392980098724, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1111227422952652, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10960675776004791, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09746198356151581, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09240701049566269, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06101299822330475, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0530480220913887, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05170706287026405, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.051384612917900085, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030462097376585007, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026352185755968094, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026259073987603188, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024154920130968094, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02395356073975563, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015759365633130074, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015530993230640888, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015293695963919163, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009787453338503838, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.11.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24233630299568176, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2177809178829193, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2087508738040924, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18401983380317688, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11188236624002457, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10238365083932877, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12954364717006683, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11933506280183792, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11496663093566895, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09590885043144226, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08969544619321823, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0663505494594574, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0574311688542366, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0539630651473999, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05314250662922859, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.033422332257032394, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028520049527287483, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02824144810438156, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025431007146835327, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024890221655368805, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01838327944278717, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01861877553164959, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017328474670648575, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013444313779473305, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1103072389960289, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1028025671839714, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09989271312952042, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09002511203289032, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.051262129098176956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.048348721116781235, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05819745734333992, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0537862591445446, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.052062343806028366, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04575059935450554, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.043310604989528656, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02953646332025528, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02567874640226364, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02455383911728859, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024277856573462486, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014766239561140537, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012628596276044846, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012521595694124699, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011515917256474495, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011345645412802696, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007701651193201542, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007714452221989632, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007318992167711258, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005065132863819599, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0907917320728302, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08470255136489868, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08208474516868591, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07401420921087265, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.042224377393722534, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03972342610359192, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04831907898187637, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04469379037618637, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.042869504541158676, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0377611368894577, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03576292470097542, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.024479862302541733, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02133830077946186, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02022378146648407, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.019958628341555595, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012239416129887104, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010415040887892246, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010299497283995152, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009517558850347996, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00934639573097229, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006394052878022194, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006407750304788351, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006040416657924652, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004201378207653761, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21456988155841827, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20033873617649078, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19534124433994293, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17625871300697327, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10025415569543839, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09488014131784439, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11311313509941101, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10426962375640869, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10178449004888535, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08965059369802475, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08472335338592529, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.057476699352264404, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.049844738095998764, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04796624556183815, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04751548171043396, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028727451339364052, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024522077292203903, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024354752153158188, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02234267070889473, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02205582708120346, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014911861158907413, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014690330252051353, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014232982881367207, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009353508241474628, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22272180020809174, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1971927434206009, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18816301226615906, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16072534024715424, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10335348546504974, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09381800144910812, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12113591283559799, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11092977225780487, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10658884793519974, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08618436008691788, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07889953255653381, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06235894188284874, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.053889479488134384, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05047066509723663, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04966257885098457, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03145095333456993, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027478452771902084, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027226753532886505, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02428479865193367, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023783741518855095, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017541054636240005, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018949130550026894, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016511179506778717, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014706488698720932, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17847976088523865, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1676403135061264, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16413871943950653, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1490100622177124, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08407039195299149, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08018359541893005, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09354715794324875, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08655092120170593, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08522825688123703, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07576460391283035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0719454288482666, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0478128045797348, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.041651975363492966, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.040536195039749146, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04026905819773674, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023937871679663658, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021330110728740692, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02124861627817154, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01969916932284832, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01954052597284317, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012833221815526485, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013580413535237312, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012467587366700172, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009849246591329575, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2335473895072937, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21940653026103973, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21499726176261902, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19520561397075653, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10963080823421478, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10458799451589584, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12185836583375931, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11267560720443726, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11114107817411423, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0987599715590477, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0936715304851532, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.061939265578985214, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05379127338528633, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05240928754210472, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.052085030823946, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03091561608016491, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02668708749115467, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026591718196868896, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024446537718176842, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024232566356658936, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016003616154193878, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01568574458360672, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015518661588430405, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0098268399015069, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.12.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24545620381832123, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22065962851047516, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2115597426891327, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18630351126194, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11329229921102524, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10389160364866257, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13105574250221252, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12078998237848282, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11645692586898804, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09716556966304779, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09083714336156845, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06706605106592178, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.058161843568086624, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05475671961903572, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05392620712518692, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03380788117647171, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029067931696772575, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028793971985578537, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02596786804497242, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02541775070130825, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018596960231661797, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019129658117890358, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017563654109835625, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013984668999910355, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1155165359377861, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1076684221625328, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10466951876878738, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09440705925226212, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05372712388634682, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05071573331952095, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06094219535589218, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.056317757815122604, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05457271635532379, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.048008695244789124, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04547367990016937, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03097641095519066, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026968836784362793, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025791019201278687, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02551095187664032, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015504958108067513, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013383743353188038, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013275430537760258, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01223943941295147, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012066476047039032, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008157162927091122, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008336169645190239, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007769113406538963, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005685788579285145, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09215562045574188, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0859471783041954, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08316244930028915, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07499475032091141, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04278355836868286, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04019496962428093, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04919110983610153, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.045521482825279236, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04344269260764122, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0382639579474926, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03631613776087761, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.024943996220827103, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.021723516285419464, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020508937537670135, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02021530270576477, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012481395155191422, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01057845912873745, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010450469329953194, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009670397266745567, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009481972083449364, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006533183157444, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0065475404262542725, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006147131323814392, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004316878505051136, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22191542387008667, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20734691619873047, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2023257613182068, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1826561987400055, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10374203324317932, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09833192080259323, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11693131178617477, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10769802331924438, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10533407330513, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09286658465862274, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08782213181257248, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05946560949087143, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05149664729833603, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.049646127969026566, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.049199335277080536, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029700346291065216, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025381527841091156, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02522111125290394, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023143112659454346, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022858252748847008, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015411864966154099, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015186495147645473, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014729401096701622, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009700520895421505, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2320500761270523, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20625734329223633, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1973901093006134, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16546069085597992, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1081821620464325, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09843770414590836, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1259610801935196, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11495912820100784, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11129680275917053, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08961701393127441, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08054352551698685, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0652327910065651, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05625813454389572, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05318550020456314, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05246661230921745, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03314617648720741, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.029319502413272858, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.029108498245477676, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025926191359758377, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025466879829764366, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.019011294469237328, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.02055373787879944, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.018115468323230743, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.016362272202968597, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17947885394096375, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16850267350673676, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16497167944908142, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14990106225013733, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08462072163820267, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08068747818470001, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09412028640508652, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08711255341768265, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08579026162624359, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07625764608383179, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07248074561357498, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04815980792045593, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.041924215853214264, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04081118106842041, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04055144637823105, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02413465455174446, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021466858685016632, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02138786017894745, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019815484061837196, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019660763442516327, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013006561435759068, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013658863492310047, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012644699774682522, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009901542216539383, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23949459195137024, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2248965948820114, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22045759856700897, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2002623975276947, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11247232556343079, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10732030868530273, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1249612420797348, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11554303020238876, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11403254419565201, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10131441801786423, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0962282046675682, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06353037804365158, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05520243942737579, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.053813476115465164, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05348554998636246, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03176111355423927, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027454184368252754, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0273659136146307, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025154348462820053, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024942126125097275, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016503866761922836, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016215406358242035, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016041891649365425, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010277227498590946, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.13.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.247678741812706, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22322940826416016, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21414802968502045, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18920695781707764, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11425463855266571, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10493168234825134, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13256323337554932, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12179171293973923, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11735009402036667, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09832832962274551, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09210475534200668, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06747747957706451, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05839712172746658, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05503341555595398, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.054224591702222824, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03385384753346443, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02893354743719101, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02865499258041382, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02583444118499756, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.0252884142100811, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018276602029800415, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01867363043129444, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017191192135214806, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013255758211016655, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11931666731834412, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11132656037807465, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10827447474002838, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09773485362529755, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05554923787713051, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05248049646615982, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06295185536146164, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05816660448908806, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05639731511473656, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04969712719321251, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04708629846572876, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03196340054273605, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027793994173407555, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026605959981679916, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02632380649447441, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015976080670952797, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013704686425626278, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013591405935585499, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012520371936261654, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012342812493443489, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008331472985446453, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008379848673939705, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007922547869384289, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005532592535018921, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09474947303533554, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08843729645013809, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08567419648170471, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07737018913030624, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04406357184052467, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04147360473871231, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.050485435873270035, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.046776000410318375, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.044744376093149185, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03948507457971573, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0374203696846962, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025571607053279877, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02231491170823574, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021103698760271072, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020813854411244392, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012784093618392944, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01084189210087061, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010715009644627571, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.00991748832166195, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.00972933042794466, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006661456078290939, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006650645285844803, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006277075503021479, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004313441924750805, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22805482149124146, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21337345242500305, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20795997977256775, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1880301833152771, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10684280842542648, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10117695480585098, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12076441943645477, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11124241352081299, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10844217240810394, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09578654915094376, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09065783023834229, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.061374880373477936, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05322601646184921, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.051131896674633026, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05061621591448784, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03068017214536667, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026095157489180565, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025899559259414673, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023824304342269897, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023493122309446335, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01591465435922146, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015579578466713428, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015166262164711952, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0098172128200531, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23279400169849396, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2112482190132141, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20457600057125092, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18007418513298035, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10876436531543732, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10056652128696442, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12422522157430649, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1137753576040268, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11120342463254929, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09316021203994751, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08696208149194717, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06406750530004501, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055525414645671844, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05322940647602081, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05270163714885712, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03259773552417755, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.029095888137817383, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028947150334715843, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02619551308453083, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02586442418396473, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.018707506358623505, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.020004883408546448, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01801123283803463, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01576538383960724, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18835337460041046, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1767907291650772, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17312034964561462, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15728545188903809, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08879382163286209, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08464819937944412, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09877259284257889, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09142578393220901, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0900268629193306, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07998307049274445, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07592165470123291, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.050462063401937485, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04392692446708679, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04274400323629379, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.042467646300792694, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02524041198194027, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022363409399986267, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022282784804701805, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02061941847205162, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02044990472495556, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013471849262714386, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014066150411963463, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01308399997651577, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010005875490605831, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24454611539840698, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2297418713569641, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2252073884010315, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2044609785079956, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11500098556280136, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1097026839852333, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12768061459064484, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1181333139538765, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11656638979911804, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10356616973876953, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09819686412811279, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06487641483545303, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05643297731876373, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05499609559774399, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.054655250161886215, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.032385677099227905, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0280211940407753, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027922047302126884, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025664737448096275, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025444908067584038, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01675107143819332, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01648886501789093, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016257716342806816, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010365035384893417, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.14.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25227659940719604, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22698679566383362, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2175971418619156, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.19193576276302338, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11628197133541107, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.1066446453332901, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1347087323665619, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12412921339273453, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11955439299345016, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09991196542978287, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09349222481250763, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.068770632147789, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05948257073760033, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.055914364755153656, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05505404248833656, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03454333916306496, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029185829684138298, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02889128588140011, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02596457302570343, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02538420632481575, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01862027309834957, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01858365908265114, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017492253333330154, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012865321710705757, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11425664275884628, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1066892147064209, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1037788987159729, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09375131875276566, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05330054461956024, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.050376102328300476, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06049206480383873, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05585893988609314, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.054117415100336075, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04772555083036423, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04525596275925636, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030727598816156387, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026704050600528717, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02553575299680233, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02525000274181366, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015362710691988468, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013127910904586315, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013015453703701496, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011999262496829033, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01182107999920845, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008015196770429611, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008003800176084042, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007624657358974218, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00524047901853919, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09409674257040024, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08787602931261063, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0851249173283577, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07685275375843048, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04381516948342323, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04122249782085419, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05019475519657135, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04650379717350006, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.044495150446891785, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.039264921098947525, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.037203896790742874, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025427227839827538, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022197594866156578, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020997805520892143, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02070719003677368, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012711468152701855, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01080871932208538, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010681580752134323, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009884021244943142, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009701569564640522, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0066415355540812016, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00665750727057457, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006265005562454462, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004361534956842661, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23338869214057922, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21844884753227234, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21322500705718994, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.192773699760437, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10949987173080444, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10383236408233643, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12349221110343933, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1136978343129158, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11108102649450302, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09818383306264877, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09297927469015121, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06281209737062454, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05438031256198883, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.052390873432159424, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05191986262798309, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03137776628136635, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02673817053437233, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026564521715044975, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02441546693444252, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024105573073029518, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016275182366371155, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015959061682224274, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015543901361525059, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0100944135338068, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21602261066436768, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.192581444978714, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.181890070438385, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16042611002922058, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09961088746786118, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08985262364149094, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12163805961608887, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11148999631404877, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10376784205436707, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08618822693824768, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08017855137586594, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06246234476566315, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.054006047546863556, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04854332283139229, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04717665910720825, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03142678365111351, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02613578923046589, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025630179792642593, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023578139021992683, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02272813394665718, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01697935163974762, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018003856763243675, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015219303779304028, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013323884457349777, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1896103173494339, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17808754742145538, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17442455887794495, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15853117406368256, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08948659896850586, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08535091578960419, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09952032566070557, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09209588170051575, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0907554030418396, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08066210150718689, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07658683508634567, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.050841182470321655, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.044318076223134995, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.043144531548023224, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04286682605743408, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025467300787568092, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022655190899968147, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022569164633750916, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02091056853532791, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.020736631006002426, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01365498173981905, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014356975443661213, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013276497833430767, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010337760671973228, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24786007404327393, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23292547464370728, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22832311689853668, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20741084218025208, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11665535718202591, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11133544147014618, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1295984536409378, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11981678009033203, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11825580894947052, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10509215295314789, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09964454174041748, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06587295234203339, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05724823847413063, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05580933764576912, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05547034367918968, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03289897367358208, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02845301665365696, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028353165835142136, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.026065098121762276, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0258512943983078, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01710880547761917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016773147508502007, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016620943322777748, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010591437108814716, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.15.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2518852651119232, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22634655237197876, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21663211286067963, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.19109919667243958, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11609905958175659, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10623965412378311, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13505063951015472, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1244012787938118, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11945020407438278, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09966811537742615, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09334447234869003, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06894591450691223, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05960272252559662, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.055848486721515656, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.054946646094322205, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03465253859758377, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029208553954958916, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02888522855937481, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025979289785027504, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025360943749547005, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018714671954512596, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018702520057559013, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017506444826722145, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012982458807528019, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12084323912858963, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11239582300186157, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1089152991771698, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0980720967054367, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05650985985994339, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05307266116142273, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06477081775665283, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.059727612882852554, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.057441916316747665, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05029917508363724, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.047656986862421036, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03294892609119415, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02859683893620968, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02714415080845356, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0267967376857996, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016487451270222664, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01406670268625021, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013928689993917942, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012823827564716339, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01260556560009718, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008672828786075115, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008772690780460835, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008190526627004147, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0059282733127474785, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10174880921840668, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09468071162700653, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09145717322826385, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08239659667015076, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.047569144517183304, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04456143453717232, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.054984092712402344, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05075114965438843, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04835112392902374, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.042378004640340805, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0402306504547596, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02796068601310253, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024270888417959213, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022847885265946388, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022498799487948418, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013974235393106937, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011827506124973297, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011672340333461761, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010784738697111607, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010564959608018398, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007345150224864483, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007386797573417425, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006895818747580051, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004931105300784111, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24329251050949097, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22705163061618805, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22164252400398254, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19996967911720276, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11421901732683182, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10824653506278992, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12915000319480896, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1182515025138855, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11600923538208008, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10195919871330261, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09667910635471344, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06575503945350647, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05647638067603111, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.054635703563690186, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05419446527957916, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03282042220234871, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027795415371656418, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02765476144850254, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025256440043449402, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024977106600999832, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016918474808335304, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01636938564479351, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016132419928908348, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010154874064028263, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12754857540130615, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11965836584568024, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1172819435596466, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10542969405651093, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06009189411997795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05724307894706726, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.067518450319767, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.062002770602703094, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06097589433193207, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.053846459835767746, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05096025392413139, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03507065400481224, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.030395328998565674, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.029535161331295967, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02933799847960472, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017971575260162354, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.016336601227521896, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01627390645444393, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01518785860389471, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01506133284419775, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.010546893812716007, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.011357574723660946, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01029183715581894, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00916628260165453, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17953193187713623, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16852129995822906, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1650051474571228, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14972776174545288, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08478794246912003, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08081745356321335, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09458673745393753, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0872998833656311, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08599107712507248, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07629790157079697, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07253523170948029, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.048426542431116104, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04193757101893425, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.040815263986587524, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.040553852915763855, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024185175076127052, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021371077746152878, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02129300683736801, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01969134248793125, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01952807791531086, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013036077842116356, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01346697099506855, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012673981487751007, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009594965726137161, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2483772337436676, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2332289218902588, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22850534319877625, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20743365585803986, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11705232411623001, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11161930859088898, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.13053712248802185, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12034058570861816, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11869461089372635, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.1052996814250946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.1000019982457161, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06647855788469315, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.057512860745191574, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.056035492569208145, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.055675726383924484, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03317061811685562, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.028667282313108444, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0285648200660944, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02625807747244835, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.026030173525214195, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017441222444176674, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.0170697383582592, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016937466338276863, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01097193080931902, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.16.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23867422342300415, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21856901049613953, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2112765908241272, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18814601004123688, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11088532209396362, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10337755084037781, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1273520439863205, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1167922243475914, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11333280801773071, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09719978272914886, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09136012941598892, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06493674218654633, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.056074295192956924, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.053393181413412094, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05274730175733566, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03258271515369415, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028046265244483948, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027822071686387062, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02537364512681961, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024951228871941566, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017701206728816032, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01796114072203636, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01682412251830101, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01277577318251133, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11096378415822983, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10330784320831299, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10010179877281189, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09022612124681473, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05195019766688347, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04882396012544632, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05954420566558838, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05493360012769699, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05279636010527611, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04633930325508118, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04394150897860527, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030388912186026573, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02642437070608139, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02506961300969124, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024742498993873596, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015235045924782753, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013209894299507141, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013076240196824074, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012120847590267658, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011915935203433037, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00814049318432808, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00853776652365923, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0076943314634263515, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006126302760094404, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09708268195390701, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09044712781906128, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08748957514762878, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07892981171607971, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04539331793785095, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04263217747211456, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.052406586706638336, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04833458736538887, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.046151190996170044, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.040546104311943054, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03853446617722511, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026657549664378166, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023120755329728127, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021807614713907242, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02149236761033535, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01333189383149147, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011289146728813648, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01115439087152481, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010318869724869728, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010117940604686737, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007021630182862282, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007057469338178635, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0065940809436142445, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004734461661428213, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2306390106678009, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2155015468597412, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21017877757549286, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18991653621196747, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10831551998853683, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10258860141038895, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12252604216337204, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11259686946868896, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10998310893774033, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09689206629991531, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09176315367221832, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06234508007764816, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.053837332874536514, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.051862265914678574, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.051382169127464294, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031134221702814102, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026447651907801628, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02627212554216385, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02409263700246811, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023787936195731163, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01611342467367649, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015736466273665428, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015370539389550686, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009889233857393265, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.17993150651454926, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15564846992492676, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14639152586460114, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1252511590719223, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08328563719987869, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07379231601953506, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09891097247600555, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09040190279483795, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08637576550245285, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06757970154285431, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06220116838812828, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.051226165145635605, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04429348185658455, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04114237800240517, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.040363069623708725, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.025980163365602493, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02297045849263668, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02273227833211422, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02018415369093418, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019695710390806198, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014955596067011356, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016527051106095314, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014014245942234993, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013321006670594215, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19177035987377167, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18025681376457214, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17651289701461792, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16039660573005676, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09067445248365402, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08648931235074997, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10123579949140549, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09335294365882874, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09193744510412216, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08171074837446213, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07772506028413773, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05188357084989548, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.044862598180770874, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.043655794113874435, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.043371107429265976, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025934895500540733, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022865064442157745, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0227799154818058, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02109139785170555, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02091609500348568, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014065563678741455, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014416422694921494, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013678155839443207, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010281221941113472, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2518758177757263, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2368001639842987, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.23202842473983765, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2107844352722168, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11874619871377945, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11330465972423553, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1324196457862854, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12206094712018967, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.12036984413862228, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10697592049837112, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10172604769468307, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06744104623794556, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05831863358616829, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05681569129228592, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.056458692997694016, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.033627063035964966, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02903497777879238, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028929132968187332, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.026623107492923737, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02639448083937168, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017667265608906746, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01724126562476158, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.017157655209302902, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.011022225022315979, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.17.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25211215019226074, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22897203266620636, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2203827053308487, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1957983672618866, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11696432530879974, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10821476578712463, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13522516191005707, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12424687296152115, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11991819739341736, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10176126658916473, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09555842727422714, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06923927366733551, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05980418995022774, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05654727667570114, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05575767159461975, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.034820299595594406, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030069248750805855, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029794946312904358, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.027151387184858322, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026629852131009102, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019198309630155563, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019804799929261208, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018155192956328392, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014556272886693478, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10976147651672363, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1022600382566452, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0991351529955864, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08932606130838394, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05128692090511322, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.048234470188617706, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05866129323840141, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.054208122193813324, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05208316072821617, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04576459154486656, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04337277263402939, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029918324202299118, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025976769626140594, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02464922145009041, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024334566667675972, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014964640140533447, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01283818669617176, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012706546112895012, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011739660054445267, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011542163789272308, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007908540777862072, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008083759807050228, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007464463356882334, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005556405521929264, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09253256767988205, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08620236068964005, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08324280381202698, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07502472400665283, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0431300513446331, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0404135137796402, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04996410384774208, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04612687602639198, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04382282868027687, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0385197214782238, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0365728959441185, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025348462164402008, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022045699879527092, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02070504054427147, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02037801407277584, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012684108689427376, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010731703601777554, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010585619136691093, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009804271161556244, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009599225595593452, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0066830855794250965, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006724985782057047, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00625698734074831, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004508872050791979, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22355873882770538, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20895370841026306, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20361819863319397, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1837969571352005, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10484756529331207, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09917647391557693, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11880850791931152, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1093098446726799, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10641353577375412, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09382161498069763, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08878542482852936, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.060434095561504364, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05225489288568497, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.050178684294223785, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04968629404902458, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030157843604683876, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025619402527809143, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02543042041361332, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023346124216914177, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023025887086987495, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015613115392625332, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015311388298869133, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014846159145236015, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009664091281592846, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20900842547416687, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18117445707321167, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17081832885742188, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1428920030593872, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09632250666618347, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0858733206987381, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1152496188879013, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10584530979394913, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10070531070232391, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07913096994161606, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0708872526884079, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05982362851500511, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0519704595208168, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04771158844232559, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04668042063713074, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030401982367038727, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02681300975382328, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0265203770250082, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02377098798751831, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023148495703935623, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0174817256629467, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019554130733013153, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016222873702645302, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015859389677643776, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18682296574115753, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17557916045188904, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1719517558813095, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1562870591878891, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08820018172264099, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08411470800638199, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09834262728691101, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09085424244403839, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08940277993679047, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07951415330171585, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07563352584838867, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.050340767949819565, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04367863014340401, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04248623177409172, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04220570996403694, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025150775909423828, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022315768525004387, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022230856120586395, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02061322145164013, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02043997496366501, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013577215373516083, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014159418642520905, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013192152604460716, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01019627321511507, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24387158453464508, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22918976843357086, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22455385327339172, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20413388311862946, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11474388837814331, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10951529443264008, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1278621256351471, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11793936043977737, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11632469296455383, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10344420373439789, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09825979173183441, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06496506184339523, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05631490424275398, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05487260967493057, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05452590435743332, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0324026383459568, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.028013715520501137, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027910828590393066, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025690747424960136, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02546667493879795, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01693187654018402, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016571126878261566, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01643991470336914, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010514294728636742, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.18.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2503977119922638, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2264021337032318, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21761228144168854, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1926344931125641, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11595910787582397, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10688237100839615, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13386951386928558, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12325917184352875, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11897920817136765, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10026878118515015, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09409444034099579, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06855299323797226, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.059327468276023865, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05603509396314621, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.055249810218811035, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03448837623000145, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.0297757126390934, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029506728053092957, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026773791760206223, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02625584416091442, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01905273087322712, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019604820758104324, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01804143190383911, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014393674209713936, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10619931668043137, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09901129454374313, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09590556472539902, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08645226061344147, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04953046143054962, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04656485840678215, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.056848619133234024, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.052491217851638794, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05033144727349281, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04425393417477608, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04196196421980858, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02891320176422596, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025116531178355217, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023765696212649345, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023439742624759674, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014464943669736385, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012311534956097603, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012171856127679348, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011252393946051598, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011050763539969921, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007597665768116713, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007677086163312197, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0071459198370575905, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005163661204278469, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09021995961666107, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08419918268918991, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08124975860118866, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07328993827104568, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04204258695244789, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.039417851716279984, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04870808869600296, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04506075009703636, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04270908609032631, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03762729465961456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03572767227888107, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.024717997759580612, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.021529588848352432, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020175620913505554, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.019845591858029366, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012360206805169582, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010425846092402935, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010277213528752327, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009530378505587578, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009323649108409882, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006492003798484802, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006504731252789497, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006065081339329481, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00430518202483654, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21683716773986816, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2026141881942749, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19732676446437836, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17825743556022644, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1015762984752655, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09609786421060562, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11542017757892609, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10600954294204712, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10310526192188263, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09089715033769608, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08611716330051422, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05862944573163986, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05065309256315231, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.048624187707901, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04814206063747406, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029291562736034393, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024854343384504318, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02466456964612007, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.022659296169877052, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022346431389451027, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015154123306274414, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014884051866829395, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014391184784471989, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009435029700398445, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20448271930217743, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17602932453155518, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1629754602909088, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14185136556625366, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09428061544895172, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08124233037233353, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1176181435585022, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1070198118686676, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09848837554454803, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07805917412042618, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07267076522111893, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06055019423365593, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.052218351513147354, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04627242684364319, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04478366672992706, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03045794926583767, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025451721623539925, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024917591363191605, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.022482771426439285, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02157384343445301, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016847340390086174, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018271684646606445, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014985288493335247, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01411079615354538, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17960533499717712, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1688472181558609, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1652054488658905, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15013903379440308, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08475115895271301, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08078011125326157, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09457088261842728, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08740266412496567, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08588872104883194, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0764319971203804, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07266417145729065, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.048394665122032166, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04202995076775551, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04085809364914894, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04057317599654198, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024188676849007607, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02150336652994156, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021415069699287415, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019873429089784622, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01970750465989113, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013070440851151943, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013720831833779812, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012681547552347183, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009952561929821968, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23428378999233246, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22029262781143188, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21585680544376373, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1961102932691574, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11016536504030228, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10514462739229202, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12281009554862976, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11332590132951736, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1116836667060852, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09934689104557037, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09434284269809723, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.062360893934965134, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05409897118806839, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.052684105932712555, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05235214903950691, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031068062409758568, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02689461037516594, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026789754629135132, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02466760016977787, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024456771090626717, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016147365793585777, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015926461666822433, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015648433938622475, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010111811570823193, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.19.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2492019683122635, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22500327229499817, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21591876447200775, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.19098544120788574, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11541582643985748, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10611703246831894, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13392172753810883, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12311860918998718, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11850342899560928, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0995887815952301, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09332986176013947, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06868381053209305, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.059231508523225784, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05573486164212227, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.054888561367988586, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03459150344133377, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029536060988903046, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029253702610731125, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02649107202887535, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025936292484402657, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019171705469489098, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019390763714909554, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018089517951011658, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01410162728279829, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11049218475818634, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10310083627700806, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10002471506595612, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09023045748472214, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05150432512164116, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.048525985330343246, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.058890897780656815, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05434619262814522, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05230195075273514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.046045538038015366, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04365408420562744, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02990628592669964, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025961928069591522, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024677492678165436, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.024372098967432976, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014950287528336048, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012723187915980816, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012595279142260551, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01162280049175024, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01142631471157074, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007818344980478287, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007827785797417164, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007383205462247133, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00516259903088212, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09227490425109863, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08619752526283264, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08327548950910568, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07519198954105377, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04299609363079071, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0403611958026886, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04960642382502556, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04589809104800224, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.043643511831760406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.038496822118759155, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03653988987207413, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025155745446681976, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.021919118240475655, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02060011960566044, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02027934230864048, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012558184564113617, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010617454536259174, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01047456730157137, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009709415026009083, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009508633986115456, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006578209809958935, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006581366527825594, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0061642685905098915, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00431071687489748, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21828341484069824, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20415955781936646, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1988144963979721, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17951223254203796, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10225074738264084, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09673871099948883, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11615052819252014, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10671831667423248, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.103800930082798, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0915030837059021, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08676435798406601, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.059050340205430984, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.051098112016916275, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04898177087306976, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04846745356917381, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029528994113206863, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02509450726211071, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02489432692527771, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.022904161363840103, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022576218470931053, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015377594158053398, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015153156593441963, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014615054242312908, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009741208516061306, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2170485258102417, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19011695683002472, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18079274892807007, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15605083107948303, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10080825537443161, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0906989797949791, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11826151609420776, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1080297976732254, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1041160598397255, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08311418443918228, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07752227783203125, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06098159775137901, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05263405665755272, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04935649409890175, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04855987802147865, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03082144446671009, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027024613693356514, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026794306933879852, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023790225386619568, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02329438552260399, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01743226684629917, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018824806436896324, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016448767855763435, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014774877578020096, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17941048741340637, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16867539286613464, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16512799263000488, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15014265477657318, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08462557941675186, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08073174953460693, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09439532458782196, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08723141998052597, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08579403907060623, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07637851685285568, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07262440025806427, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.048280712217092514, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.041960977017879486, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04080672934651375, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04052630811929703, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024121850728988647, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.021509867161512375, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021427327767014503, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01990523561835289, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019734051078557968, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013007513247430325, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013767901808023453, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01262875460088253, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010035908780992031, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23300494253635406, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21919646859169006, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21479438245296478, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1952960044145584, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.109586201608181, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10463079810142517, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1220940425992012, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11264447867870331, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11105197668075562, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09888593852519989, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09394101798534393, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.062055155634880066, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.053768884390592575, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.052380066365003586, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05204606056213379, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03092537634074688, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026703236624598503, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02660381607711315, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02449863590300083, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024288538843393326, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01609034463763237, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01575908251106739, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015592426992952824, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009937284514307976, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.20.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2547803223133087, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22972041368484497, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.22038336098194122, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1946043223142624, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11792309582233429, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10835543274879456, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13678038120269775, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12560923397541046, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12109807878732681, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.1015048623085022, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0951225683093071, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07018036395311356, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0605253241956234, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0570150688290596, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05617009103298187, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03538256883621216, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03035125881433487, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.0300744716078043, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.027210230007767677, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026659216731786728, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019704487174749374, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.02007526159286499, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018623674288392067, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01479314174503088, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11804206669330597, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11015184968709946, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10693566501140594, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09659156203269958, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.055031269788742065, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05188459903001785, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0626613199710846, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0579005666077137, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05586129054427147, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0492243729531765, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.046734508126974106, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03188399598002434, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027730027213692665, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026423713192343712, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02610992267727852, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0159478597342968, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01372341439127922, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013596753589808941, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012568132020533085, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012373500503599644, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00839940831065178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008585943840444088, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007960080169141293, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005867966916412115, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09518729895353317, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0888562873005867, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08574945479631424, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07745441794395447, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04425525665283203, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.041489727795124054, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05121292546391487, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04739769920706749, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04492988437414169, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03965306282043457, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03766681253910065, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025981148704886436, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022632598876953125, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021216508001089096, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020870888605713844, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012986497953534126, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010954936034977436, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010799171403050423, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010030665434896946, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009813033975660801, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006815367843955755, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006834907457232475, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0063753691501915455, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0045142704620957375, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2239813208580017, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2096170037984848, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.20437148213386536, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18465274572372437, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10485882312059402, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09937187284231186, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11860676854848862, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1090574562549591, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10638629645109177, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09398370236158371, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08908487856388092, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.060370612889528275, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05218326672911644, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05018357187509537, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04970996454358101, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030171601101756096, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025699486956000328, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025511179119348526, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023470774292945862, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023159313946962357, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015681838616728783, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015448672696948051, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01493929699063301, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009902512654662132, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2200886309146881, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1955503225326538, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18701379001140594, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15908978879451752, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10253064334392548, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0934826210141182, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11973829567432404, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10943575203418732, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10588552802801132, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08527418226003647, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07763643562793732, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06194445118308067, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05333051458001137, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.050197914242744446, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.049439191818237305, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03137557581067085, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02736322581768036, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027140840888023376, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024141700938344002, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023675590753555298, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01775377430021763, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018855614587664604, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01684633456170559, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014664494432508945, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17053760588169098, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16039617359638214, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1570086032152176, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14284692704677582, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08044611662626266, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07675258815288544, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08967342227697372, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08285673707723618, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08154555410146713, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07261127233505249, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0691428855061531, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.045868031680583954, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03985198214650154, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.038772132247686386, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03851254656910896, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022919995710253716, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020400743931531906, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020318204537034035, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018864870071411133, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01870853826403618, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012365005910396576, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012997431680560112, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012013444676995277, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009420819580554962, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.231125146150589, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21743492782115936, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2131769210100174, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19391103088855743, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10866276919841766, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10377305746078491, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12094319611787796, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11163090169429779, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11010304093360901, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09807629883289337, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09323084354400635, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.061494216322898865, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05327989533543587, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05193572863936424, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0516185536980629, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030640222132205963, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026474088430404663, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026382744312286377, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02429262362420559, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024091817438602448, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01596391573548317, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015607286244630814, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015496582724153996, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00983124878257513, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.21.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2459362894296646, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22169700264930725, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21242421865463257, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18792147934436798, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11357810348272324, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10424475371837616, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13269942998886108, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12152369320392609, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1166660338640213, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09788099676370621, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09196476638317108, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06758886575698853, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.058369677513837814, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05480285733938217, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05393664911389351, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03389003872871399, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02899777702987194, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028701193630695343, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025982065126299858, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025415824726223946, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01843983493745327, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01900511421263218, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017300643026828766, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013743730261921883, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1208881139755249, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1130034476518631, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1098831370472908, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09937624633312225, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.056341081857681274, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05325157940387726, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06409347802400589, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05916319042444229, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05718694627285004, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05054805800318718, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04798316955566406, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03256845101714134, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028264664113521576, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026993177831172943, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026690559461712837, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016270658001303673, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013920809142291546, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013798722065985203, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012752954848110676, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01255875639617443, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00850269291549921, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008554778061807156, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008061143569648266, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005674619227647781, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09625384956598282, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08999843895435333, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08706454187631607, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07874616235494614, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04476403817534447, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04208454489707947, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05157102644443512, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04772427678108215, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04542485624551773, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04018158093094826, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03819867596030235, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026117432862520218, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022770611569285393, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02143154665827751, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02111057937145233, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013048386201262474, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011026782914996147, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010880700312554836, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010102121159434319, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009896917268633842, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006820141337811947, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006798690650612116, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006406496744602919, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004424208775162697, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2250516265630722, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21088357269763947, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2055191993713379, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18615132570266724, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10544193536043167, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09986398369073868, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11960962414741516, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10999295115470886, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10694887489080429, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0947515070438385, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08985976874828339, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.060827650129795074, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05266254022717476, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05047246813774109, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.049938853830099106, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030407782644033432, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02579614706337452, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025581203401088715, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02360006980597973, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023261049762368202, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01581975817680359, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01548323780298233, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015038604848086834, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009817301295697689, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2218521237373352, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2017163783311844, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19497428834438324, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.17351683974266052, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10358943045139313, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09581731259822845, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11817136406898499, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10819633305072784, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10586938261985779, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08904486894607544, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0836947113275528, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06104929372668266, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.052874136716127396, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05078615993261337, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.050300776958465576, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03109784424304962, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02783752791583538, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027701836079359055, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02515404112637043, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024848803877830505, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017912661656737328, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019241247326135635, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017293911427259445, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015267088077962399, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17211540043354034, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16188590228557587, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1585046947002411, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14420966804027557, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08108840882778168, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07739461213350296, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0903598815202713, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08351482450962067, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08219484984874725, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07319729030132294, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06963446736335754, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04618921875953674, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04009567201137543, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03901083022356033, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.038756366819143295, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02304045669734478, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02042222209274769, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02034207619726658, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018859507516026497, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018703818321228027, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012341397814452648, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012861795723438263, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011987617239356041, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009158642031252384, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23161856830120087, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21796047687530518, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21367689967155457, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19450196623802185, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10892440378665924, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10404058545827866, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12114151567220688, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11187124252319336, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11039206385612488, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09837425500154495, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09354003518819809, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0615830235183239, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05340327322483063, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05205690488219261, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0517374686896801, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030674340203404427, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02652915194630623, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026437189429998398, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02436205744743347, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024156253784894943, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01593826897442341, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015638021752238274, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015473711304366589, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00984589010477066, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.22.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24806426465511322, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2231411635875702, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21364861726760864, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18887051939964294, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11435926705598831, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10472731292247772, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13326182961463928, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12241300195455551, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11757678538560867, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09828737378120422, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09223829209804535, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06800282746553421, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.058675650507211685, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05504807084798813, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05416202172636986, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.034129682928323746, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028883090242743492, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028576118871569633, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02574612759053707, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.0251601655036211, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018545014783740044, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018620485439896584, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0173953790217638, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013110396452248096, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11684632301330566, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10930843651294708, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10630206763744354, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09623342752456665, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05452921614050865, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05156392604112625, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06206710636615753, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05724290385842323, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05533529818058014, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04899618402123451, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04655754938721657, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.031532131135463715, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027379006147384644, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026134543120861053, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02583797462284565, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015768254175782204, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0134544987231493, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013332373462617397, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012331178411841393, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012140974402427673, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008237584494054317, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00823634210973978, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007823738269507885, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005418418440967798, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09596070647239685, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08978234976530075, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08683889359235764, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07855447381734848, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04464113712310791, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.041997093707323074, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05132727697491646, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.047556180506944656, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04530160874128342, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.040087420493364334, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.038080845028162, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026018166914582253, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022682562470436096, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021385692059993744, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021065618842840195, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012989338487386703, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011016806587576866, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010872183367609978, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010096962563693523, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009897136129438877, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006794979330152273, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006802938412874937, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006389160640537739, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004450168460607529, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23182986676692963, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2174079567193985, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21214909851551056, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1922662854194641, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10872547328472137, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10319525748491287, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12296591699123383, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11301950365304947, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11023440212011337, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09776096791028976, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.092812180519104, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06252028048038483, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05407474562525749, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05203050374984741, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05154235288500786, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031248409301042557, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026599569246172905, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026407919824123383, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024357888847589493, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024042770266532898, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01624555140733719, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015928542241454124, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01549533847719431, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010127299465239048, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21626484394073486, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1946057826280594, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18549761176109314, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1665373295545578, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10106955468654633, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09156332910060883, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11968416720628738, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10967233777046204, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10386214405298233, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08743248879909515, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08253213763237, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06162605807185173, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05303705111145973, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.049063682556152344, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04811282083392143, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03094906359910965, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026094142347574234, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025725798681378365, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023452088236808777, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022846050560474396, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016744952648878098, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0173846073448658, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01546658854931593, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012667035683989525, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17230075597763062, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16202977299690247, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15873531997203827, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14453107118606567, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08122635632753372, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0775468647480011, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09040118753910065, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08358000963926315, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08232660591602325, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07335133850574493, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06986280530691147, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04619128629565239, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04017193987965584, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03911494091153145, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.038863129913806915, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023081902414560318, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020543528720736504, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020467232912778854, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018996967002749443, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01884358562529087, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012408711016178131, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013022888451814651, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01206534169614315, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009389311075210571, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23545534908771515, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2216431051492691, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2173195779323578, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19782733917236328, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11074647307395935, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10582610964775085, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12314736843109131, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1137232854962349, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11221564561128616, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10006952285766602, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09516194462776184, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0625673457980156, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05427291989326477, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05292702093720436, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.052615806460380554, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031183622777462006, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026997534558176994, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026905328035354614, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024797582998871803, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024592017754912376, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016235457733273506, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015934595838189125, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015771638602018356, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010069841518998146, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.23.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24596287310123444, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22049297392368317, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21042890846729279, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1861293613910675, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11335098743438721, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10329516977071762, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13297490775585175, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12215383350849152, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1167622059583664, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09725721180438995, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0914369523525238, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06797628104686737, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05861511081457138, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05463317409157753, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.053660545498132706, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03425680845975876, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028792301192879677, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028451362624764442, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.0256754532456398, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02502826601266861, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018802093341946602, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018783999606966972, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01754862442612648, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013348972424864769, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12426898628473282, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11635202169418335, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11323105543851852, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10251405835151672, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.057988397777080536, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05492085963487625, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06580434739589691, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.060696523636579514, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.058846183121204376, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.052123576402664185, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.049588073045015335, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0334852859377861, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029048623517155647, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027819253504276276, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027526335790753365, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01675487868487835, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014394422993063927, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014277440495789051, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013214249163866043, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0130294319242239, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008806007914245129, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008895231410861015, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008398199453949928, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005989441182464361, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10050904750823975, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09404783695936203, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09113262593746185, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08245015144348145, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0467594638466835, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.044056486338377, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05357569456100464, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.049645692110061646, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04744085669517517, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.042022500187158585, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03991944342851639, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02715330570936203, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02368273213505745, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022399572655558586, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022081425413489342, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013567442074418068, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01153518632054329, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01139589212834835, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0105757350102067, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010376178659498692, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0071024782955646515, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007107516750693321, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006699974648654461, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0046521322801709175, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2387118637561798, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22412529587745667, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21894213557243347, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19861772656440735, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11200857162475586, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10650678724050522, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12622778117656708, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11613806337118149, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11358195543289185, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10086461901664734, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09578998386859894, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06419817358255386, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055531859397888184, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05359061434864998, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.053118377923965454, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032045330852270126, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027339845895767212, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027170732617378235, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02503635361790657, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024742551147937775, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01657361537218094, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016254721209406853, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015850184485316277, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010234787128865719, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2204928994178772, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20146799087524414, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1951780468225479, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1699543297290802, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10240384936332703, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0952894389629364, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11735393851995468, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10653457045555115, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10451646149158478, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08781438320875168, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08062149584293365, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06028393283486366, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05205650255084038, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05020793154835701, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.049741923809051514, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030724352225661278, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027582846581935883, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027465855702757835, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024900566786527634, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02463201805949211, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017668206244707108, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.019086139276623726, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01713227853178978, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01520955003798008, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16948670148849487, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1593407392501831, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15610076487064362, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1420842707157135, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07985658943653107, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07623893022537231, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08889736235141754, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08214852958917618, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0809427946805954, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07211442291736603, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06868700683116913, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0454072542488575, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03946623578667641, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03841967135667801, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.038174841552972794, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02268628403544426, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020122291520237923, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020052703097462654, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018597478047013283, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01844499632716179, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012171872891485691, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012686103582382202, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011832901276648045, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00904967449605465, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23269370198249817, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21912327408790588, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21481116116046906, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19558855891227722, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10945023596286774, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10457707941532135, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1217212975025177, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11234210431575775, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11087142676115036, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09885018318891525, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0941036269068718, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06187356263399124, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05363242328166962, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05230671539902687, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.051993243396282196, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030875351279973984, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026680849492549896, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026585523039102554, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024505509063601494, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024307234212756157, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01614750362932682, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01574435457587242, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015704156830906868, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00995374470949173, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.24.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23800285160541534, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21423359215259552, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2052026391029358, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1817731112241745, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10958516597747803, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.1004030704498291, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12769122421741486, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11737123131752014, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11265154927968979, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09442822635173798, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08877354115247726, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.065205879509449, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.056253399699926376, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05271288380026817, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05186750367283821, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03272748738527298, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027635477483272552, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027327371761202812, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02467164769768715, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024091878905892372, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01777557283639908, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017784355208277702, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.0166480652987957, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012443184852600098, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12057953327894211, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11296862363815308, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10994920134544373, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09956754744052887, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05627927929162979, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05326874554157257, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.063986636698246, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.059033431112766266, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05708198994398117, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05063248053193092, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04820584878325462, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.032553210854530334, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028216082602739334, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026953354477882385, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026652175933122635, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016267115250229836, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01387002132833004, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013751223683357239, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01273570116609335, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01254661101847887, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008495906367897987, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008483919315040112, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008075213991105556, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005574704147875309, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09799930453300476, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09168560057878494, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08875268697738647, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08032522350549698, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04558911174535751, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04291977360844612, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05245357006788254, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.048557646572589874, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04625420272350311, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04098544269800186, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.038994867354631424, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02659057080745697, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02317364513874054, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02183779701590538, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02151653729379177, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013284098356962204, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011258743703365326, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011114874854683876, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010333304293453693, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010130335576832294, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006967275403439999, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006971550174057484, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006554423365741968, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0045889802277088165, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23479562997817993, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22054360806941986, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21550798416137695, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19565364718437195, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11015056073665619, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10482418537139893, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12405703961849213, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1140851154923439, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11168385297060013, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09926115721464157, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09436216950416565, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06305403262376785, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.054508015513420105, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05269394814968109, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05225834622979164, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03147834911942482, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026866966858506203, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026701854541897774, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02461995929479599, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02433803677558899, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016267117112874985, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015929173678159714, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015570373274385929, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01000497117638588, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2143486738204956, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19218041002750397, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.185064435005188, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15554659068584442, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10030634701251984, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09224335849285126, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11512085050344467, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10526732355356216, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1027088463306427, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08347326517105103, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07488012313842773, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05935239419341087, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.051159363240003586, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04892513528466225, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04839935153722763, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02991625666618347, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026438796892762184, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026277223601937294, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023275159299373627, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02294222265481949, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016777578741312027, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01785867288708687, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016111575067043304, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013751652091741562, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17379145324230194, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1635272204875946, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16019287705421448, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14587095379829407, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08190380036830902, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07820575684309006, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09109222888946533, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08419430255889893, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08298550546169281, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07398318499326706, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07043389976024628, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04650827869772911, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.040394071489572525, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.039360594004392624, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03911556676030159, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023250630125403404, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020521309226751328, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020450495183467865, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01895236223936081, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018801558762788773, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01248116698116064, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012805298902094364, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01214667595922947, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008995717391371727, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.235815167427063, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22201550006866455, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2178037166595459, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1983174830675125, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1108459085226059, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1059783399105072, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12339530140161514, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.113798588514328, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11234042048454285, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10023381561040878, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09537528455257416, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0626169890165329, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05432534217834473, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0530066043138504, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05269510671496391, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031310755759477615, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027067137882113457, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026975717395544052, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02487746998667717, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024677321314811707, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016486572101712227, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01600896380841732, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016053957864642143, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010189976543188095, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.25.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23925846815109253, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21501238644123077, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20564323663711548, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18210016191005707, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11003581434488297, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10058694332838058, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1283637136220932, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11825244128704071, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11327358335256577, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09476084262132645, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0891570970416069, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06550679355859756, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.056809958070516586, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05303909629583359, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05209453031420708, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.032863717526197433, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02792974002659321, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027606768533587456, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024971995502710342, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024368520826101303, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017555266618728638, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018187249079346657, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016296762973070145, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012929641641676426, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12073361873626709, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11327789723873138, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11023789644241333, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10004639625549316, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05640529841184616, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05343899130821228, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06409020721912384, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.059134066104888916, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.057173047214746475, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.050847575068473816, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04848913103342056, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03258194401860237, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028288789093494415, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027030618861317635, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026731135323643684, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016300946474075317, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013940046541392803, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013815036043524742, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012813960202038288, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0126266498118639, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008546615019440651, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008553367108106613, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008140123449265957, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005666111130267382, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10072771459817886, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09445057809352875, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09157535433769226, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08306828141212463, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04696156829595566, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.044321950525045395, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05380697920918465, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0497698113322258, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04764300212264061, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04231696575880051, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0403069332242012, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02726941555738449, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02375212498009205, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022486772388219833, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02218039520084858, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013625702820718288, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011559187434613705, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011425947770476341, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010621326044201851, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010429079644382, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0071186707355082035, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00709299324080348, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006724305916577578, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004613068886101246, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2375001311302185, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2232220619916916, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2184118777513504, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1984947770833969, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11146314442157745, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10619007796049118, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12517277896404266, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11512476205825806, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11297615617513657, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10056427866220474, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09570243209600449, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06362059712409973, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05498651787638664, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05329388752579689, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.052874188870191574, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03174235671758652, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02708182856440544, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026933200657367706, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024807708337903023, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024546869099140167, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01632324792444706, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01590515673160553, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015658169984817505, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00980931892991066, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2282325178384781, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.205235555768013, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1976737380027771, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16651907563209534, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10630611330270767, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09827205538749695, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12201628088951111, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11211781203746796, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10941055417060852, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08910169452428818, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0802019014954567, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06292558461427689, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05448656901717186, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05187153443694115, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.051247015595436096, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03174576908349991, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02807784639298916, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027923962101340294, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02485651522874832, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024463769048452377, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017802705988287926, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01907992921769619, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017021270468831062, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014728916808962822, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17888636887073517, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16833308339118958, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16500845551490784, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15020112693309784, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08426009118556976, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0804910957813263, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09355083107948303, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08657855540513992, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08537670969963074, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07613116502761841, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0724370926618576, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0477200411260128, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04149698466062546, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04045688733458519, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.040212757885456085, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02380746230483055, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02102310210466385, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020948374643921852, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019404146820306778, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019252603873610497, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012633733451366425, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013017323799431324, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012289815582334995, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009031431749463081, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2403315305709839, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22636379301548004, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22197121381759644, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20223835110664368, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11304178833961487, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1080746054649353, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12537936866283417, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11594909429550171, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11453134566545486, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.1022191122174263, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09723055362701416, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0637190043926239, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05534014105796814, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05401775613427162, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05370640754699707, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0317387655377388, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027529099956154823, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027436884120106697, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02529377117753029, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02509613335132599, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016468018293380737, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016214637085795403, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016018403694033623, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010215288028120995, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.26.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2410992980003357, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21656249463558197, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20663289725780487, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18314050137996674, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11079965531826019, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10110712051391602, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12992706894874573, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11975795030593872, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11415846645832062, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0954851284623146, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08983808010816574, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06619061529636383, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.057336315512657166, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.053329531103372574, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05233665928244591, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03308185189962387, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027920378372073174, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027555570006370544, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.0249174777418375, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024255814030766487, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017649224027991295, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01800493709743023, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016337409615516663, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01249991450458765, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11854338645935059, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11114604771137238, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10806110501289368, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09807603806257248, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05538994073867798, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05239064246416092, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06316288560628891, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05824294313788414, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05616134777665138, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.049898259341716766, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04758384823799133, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03212711215019226, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027867071330547333, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026555603370070457, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0262474212795496, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016077743843197823, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01375051960349083, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013620360754430294, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012651773169636726, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012458659708499908, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008465844206511974, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008533492684364319, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008047974668443203, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005746257491409779, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09972082823514938, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0934286043047905, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09042937308549881, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08193416893482208, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04645209014415741, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04372478276491165, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05348401516675949, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04952653869986534, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0471041239798069, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04180077090859413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03982636332511902, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027104033157229424, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023623976856470108, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022252438589930534, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021919574588537216, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01354331336915493, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011462184600532055, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01131153479218483, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01052536629140377, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010316550731658936, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007092590909451246, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00708278501406312, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006673780735582113, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004638818558305502, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23477748036384583, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22057023644447327, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21566806733608246, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1960138976573944, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1102079525589943, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10490506887435913, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12390176206827164, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11402375996112823, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11170021444559097, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09931579232215881, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09450975060462952, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06302640587091446, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.054445598274469376, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0526835173368454, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.052270449697971344, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03142640367150307, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026823686435818672, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02667219191789627, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024581220000982285, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02431657537817955, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01618899405002594, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015825318172574043, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015509359538555145, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009871508926153183, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2145296335220337, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18953843414783478, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1811748594045639, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15489691495895386, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09976297616958618, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09038574993610382, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11591368913650513, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10585102438926697, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10273219645023346, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08238671720027924, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07552538067102432, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.059808455407619476, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05153270065784454, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04880782589316368, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04816972836852074, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03031313046813011, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0266619510948658, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026471426710486412, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023444410413503647, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023036282509565353, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017204347997903824, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018400801345705986, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01639755256474018, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014405501075088978, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1830311417579651, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1722795069217682, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16893735527992249, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15396185219287872, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0862378478050232, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08241763710975647, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09594082832336426, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08857174217700958, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08738989382982254, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07797921448945999, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07430027425289154, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.048877276480197906, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04248043894767761, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04143236577510834, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04117954149842262, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024450641125440598, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02159891277551651, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021525271236896515, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019957901909947395, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019803298637270927, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013124859891831875, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013470537960529327, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01277847308665514, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009462746791541576, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23999516665935516, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22607870399951935, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22174140810966492, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2021178901195526, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11290611326694489, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10794907808303833, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1253904551267624, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11578711122274399, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11438600718975067, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10212180018424988, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09720510989427567, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06371430307626724, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.055315274745225906, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05399419367313385, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.053679075092077255, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031836628913879395, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027579443529248238, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027495205402374268, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025369461625814438, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025171823799610138, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016718747094273567, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016330542042851448, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016284316778182983, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010434351861476898, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.27.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23875245451927185, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2134810984134674, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20307083427906036, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18001103401184082, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10957182198762894, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09947974234819412, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12927183508872986, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11911063641309738, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11302965879440308, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0941079631447792, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08866098523139954, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06586641818284988, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.056971754878759384, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05267483368515968, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05162113159894943, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03290413320064545, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027478637173771858, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027089405804872513, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024431413039565086, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023729488253593445, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017542054876685143, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01764605939388275, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016157228499650955, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012066314928233624, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11495976895093918, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10773743689060211, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10471966117620468, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09498650580644608, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.053591158241033554, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05068212375044823, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.061194177716970444, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.056486718356609344, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05437116324901581, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04825759679079056, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04602079093456268, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.031075622886419296, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026958821341395378, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025660812854766846, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02535407803952694, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015531929209828377, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013195278123021126, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013065540231764317, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012114637531340122, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011919134296476841, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008135474286973476, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008066353388130665, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0077117192558944225, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005251497030258179, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09732312709093094, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09111630916595459, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08814004808664322, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07991158217191696, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0452868789434433, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.042615246027708054, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.052268996834754944, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04834432527422905, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04595516622066498, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04077397659420967, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03886617347598076, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02649092487990856, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02308344468474388, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02169722318649292, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021362369880080223, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013233723118901253, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01117587462067604, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011022517457604408, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010263307020068169, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010053934529423714, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006937794853001833, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006915709003806114, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006510166451334953, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004525788128376007, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23094309866428375, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2168632596731186, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2118283063173294, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19247856736183167, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10817687213420868, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10281678289175034, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12192363291978836, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1123017817735672, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10966812819242477, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09748554974794388, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09280882775783539, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06197075545787811, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05359312891960144, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05170183628797531, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0512554794549942, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0309082493185997, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02632751688361168, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026159008964896202, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02412399835884571, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023835329338908195, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015945347025990486, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01556815579533577, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01524794939905405, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009691471233963966, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21796657145023346, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19152453541755676, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18209147453308105, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1533859223127365, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10163487493991852, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09121421724557877, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11833737790584564, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10873613506555557, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10484261065721512, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08276834338903427, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07470610737800598, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06131846457719803, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05285286903381348, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04961687698960304, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04883499816060066, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03101550228893757, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026991190388798714, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02674243040382862, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02349037677049637, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023000093176960945, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017516404390335083, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01856902241706848, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016519159078598022, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014367872849106789, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18340644240379333, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17264175415039062, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1692391335964203, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15418480336666107, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08627734333276749, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0824810266494751, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09591314196586609, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08862181007862091, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0874602347612381, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07804782688617706, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0742754265666008, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04888240247964859, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04247076064348221, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04140956327319145, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.041155822575092316, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024405421689152718, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02149450033903122, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021420473232865334, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019842080771923065, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0196850448846817, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012991226278245449, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013266289606690407, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01264442503452301, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00916240829974413, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23740041255950928, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22363042831420898, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2193322628736496, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19977237284183502, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11154814064502716, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10667374730110168, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1238379180431366, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11441639065742493, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1130133718252182, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10090715438127518, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09597859531641006, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06288450211286545, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05462460219860077, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05332067236304283, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.053004875779151917, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031356554478406906, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02720349282026291, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02711717039346695, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02501746639609337, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024824418127536774, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016321882605552673, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016065528616309166, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015880491584539413, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01018491666764021, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.28.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2432863712310791, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21716441214084625, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20665623247623444, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18332712352275848, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11171425133943558, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10139144957065582, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13232648372650146, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12113489210605621, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11529503017663956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09582400321960449, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09040658175945282, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0673045665025711, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.058080870658159256, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05386103689670563, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05282112583518028, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.033746734261512756, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02836540900170803, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027997462078928947, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02527703531086445, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02459762617945671, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018253564834594727, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018529243767261505, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016870906576514244, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013116100803017616, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11590466648340225, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10863487422466278, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10559136420488358, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09572408348321915, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05406484752893448, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05112998187541962, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06175612285733223, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.056945689022541046, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05483120307326317, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.048689719289541245, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.046463124454021454, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03138503432273865, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027200384065508842, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02591097727417946, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025599418208003044, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01571379229426384, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013352730311453342, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013216337189078331, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012267078272998333, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012066383846104145, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008248948492109776, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008202597498893738, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007832557894289494, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005406780634075403, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09734784066677094, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0911780297756195, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08804380893707275, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07973544299602509, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04525227099657059, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04249780625104904, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05248850956559181, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04856457561254501, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04590481519699097, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.040758393704891205, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03884970396757126, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026591036468744278, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0231810100376606, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021696725860238075, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021334471181035042, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013308640569448471, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011212256737053394, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011042117141187191, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01030981820076704, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010082174092531204, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007019297685474157, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0070057702250778675, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006566791329532862, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0046366299502551556, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2385544329881668, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2241666615009308, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21921904385089874, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19924789667129517, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11188284307718277, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10652623325586319, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1258118897676468, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11570849269628525, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11342816054821014, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.100849449634552, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09596749395132065, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06390403211116791, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05522790923714638, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05346621945500374, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05304126814007759, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031871095299720764, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027179326862096786, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027031036093831062, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024898292496800423, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024630412459373474, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01641087979078293, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015981227159500122, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015736844390630722, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009883753024041653, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20281238853931427, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1785435527563095, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17086219787597656, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14051730930805206, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09407086670398712, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08541490882635117, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1080298125743866, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09911404550075531, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09681334346532822, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0761854350566864, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06809896230697632, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05561196058988571, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04831261560320854, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04603445529937744, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.045487210154533386, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028183238580822945, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025278441607952118, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0251450315117836, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.022031674161553383, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021680500358343124, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016097556799650192, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017557883635163307, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01543137151747942, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013928011059761047, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.17544673383235931, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16523697972297668, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.16201426088809967, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.147681325674057, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08254372328519821, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07888464629650116, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09158571809530258, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0847323089838028, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.083621546626091, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07465791702270508, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07107241451740265, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04667358100414276, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04055995121598244, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.039566751569509506, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.039330266416072845, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02327965945005417, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020487947389483452, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02041613683104515, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.018905283883213997, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01875491999089718, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012326190248131752, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012564477510750294, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01199925597757101, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008582874201238155, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2294054925441742, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21620209515094757, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2120646834373474, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19337420165538788, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10776875168085098, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10304993391036987, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11947333812713623, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11052659898996353, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10917114466428757, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09754280000925064, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09284718334674835, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06071528419852257, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05273548141121864, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.051486536860466, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05118848755955696, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030233558267354965, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02620142139494419, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026118097826838493, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02409142628312111, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02390388399362564, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015671463683247566, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015376286581158638, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015251324512064457, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009621160104870796, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.29.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23088763654232025, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20590180158615112, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19561919569969177, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17360511422157288, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10570743680000305, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09575074166059494, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12537778913974762, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11527297645807266, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10916074365377426, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09076384454965591, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08581406623125076, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06392522156238556, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.055179987102746964, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05089837312698364, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04985330253839493, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03204075247049332, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026727207005023956, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026340901851654053, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023812325671315193, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023117322474718094, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017347974702715874, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01740027777850628, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015967825427651405, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012164164334535599, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12038952112197876, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11309462785720825, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11019953340291977, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1001964807510376, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05627739429473877, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05339827388525009, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06367030739784241, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.058922700583934784, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.057029299437999725, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05080864205956459, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.048401180654764175, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03228672593832016, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028112616389989853, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026920977979898453, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0266405139118433, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016155095770955086, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013780005276203156, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013658725656569004, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012665349058806896, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012483584694564342, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008419470861554146, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008292878046631813, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008036251179873943, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005294363014400005, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10527609288692474, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09885846078395844, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09612671285867691, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08737197518348694, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04915953800082207, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04655041918158531, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05609113723039627, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.051802944391965866, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04983833059668541, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04442248493432999, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.042355459183454514, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028449544683098793, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024713391438126564, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023518070578575134, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023231932893395424, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014202296733856201, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012043883092701435, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011919030919671059, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011073182336986065, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010891520418226719, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007393438369035721, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0072906604036688805, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0070111751556396484, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004651318769901991, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23866136372089386, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22460678219795227, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21999436616897583, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20033103227615356, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11205917596817017, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10694009065628052, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12532657384872437, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11550503224134445, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11354987323284149, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10129879415035248, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09647561609745026, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0636754110455513, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.055143196135759354, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053540684282779694, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05316949263215065, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031754836440086365, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027196507900953293, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02707652561366558, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024967852979898453, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024722732603549957, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01635202206671238, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015930501744151115, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015761250630021095, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009815637022256851, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2217235416173935, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1884707808494568, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17729121446609497, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13944214582443237, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1021413803100586, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08960448950529099, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11995195597410202, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11010172218084335, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10637369751930237, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07772704213857651, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07021549344062805, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.061698589473962784, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05322613939642906, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04959928244352341, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.048712506890296936, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031039170920848846, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026556387543678284, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026323748752474785, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021956708282232285, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021354850381612778, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017126446589827538, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017844030633568764, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015997309237718582, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013263336382806301, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18146787583827972, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17098304629325867, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1676655411720276, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1529296636581421, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08537089824676514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08163828402757645, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09468824416399002, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0876140221953392, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08650510758161545, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07726559042930603, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07363731414079666, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04823734983801842, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04193442314863205, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04091855138540268, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04067787900567055, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024058016017079353, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02116354927420616, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021095309406518936, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019533153623342514, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01938398741185665, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012709085829555988, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012954050675034523, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012371839955449104, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008811136707663536, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23435239493846893, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22093069553375244, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.216779425740242, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19776804745197296, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1100626066327095, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10532534867525101, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1222631111741066, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11286461353302002, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11151473224163055, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09967146813869476, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09505045413970947, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.062141843140125275, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.053871456533670425, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05260750278830528, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05231000855565071, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031012535095214844, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026842867955565453, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026757320389151573, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024702206254005432, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024513600394129753, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016239171847701073, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.0158526748418808, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01582564227283001, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010074091143906116, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.30.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23447510600090027, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20960330963134766, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1989629864692688, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1768406480550766, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1075381487607956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09738204628229141, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1279970109462738, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11760418117046356, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11103711277246475, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0925990492105484, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08754283934831619, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0650458037853241, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.056399427354335785, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05183267220854759, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05070940777659416, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03253332898020744, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02727472595870495, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026847533881664276, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.0243685320019722, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023623790591955185, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017393194139003754, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017865199595689774, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015917865559458733, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012529420666396618, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11687280982732773, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10965948551893234, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10670950263738632, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09685081243515015, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.054585326462984085, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05170634388923645, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06200191378593445, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.057313740253448486, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05535195395350456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04921082407236099, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04688192531466484, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.031474519520998, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02739276923239231, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026136158034205437, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025835655629634857, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015744397416710854, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013412569649517536, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013287921436131, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01232062466442585, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012129462324082851, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00822508241981268, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008148583583533764, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007819042541086674, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005273486953228712, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09944367408752441, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09329775720834732, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09044446051120758, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08208825439214706, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04638596996665001, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04378128424286842, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.053402360528707504, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04928999021649361, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04707523062825203, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04185852035880089, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03993367403745651, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027063172310590744, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02352740988135338, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022224804386496544, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02190948650240898, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013527764938771725, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011414875276386738, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011270885355770588, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010488380677998066, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010289772413671017, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007074160501360893, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006992587819695473, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006662669591605663, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00451836921274662, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23826929926872253, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2239656299352646, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21918830275535583, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1992877870798111, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11179222166538239, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10656946152448654, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12525956332683563, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11536194384098053, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11333948373794556, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10086659342050552, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09606899321079254, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0636451318860054, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05505960062146187, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05342491343617439, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.053035885095596313, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03174291551113129, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027155118063092232, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02702724188566208, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024894677102565765, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024646811187267303, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01631750352680683, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015927966684103012, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015684152022004128, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009840811602771282, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2078845351934433, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18141336739063263, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17336197197437286, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14871983230113983, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09668892621994019, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08685506135225296, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11056742817163467, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10170873999595642, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09963838011026382, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07859783619642258, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07166485488414764, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05695129558444023, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04916505515575409, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0468885712325573, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.046356819570064545, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028605088591575623, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025041848421096802, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024913650006055832, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021587375551462173, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02123824879527092, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0156855508685112, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016564499586820602, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014980263076722622, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012369972653687, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1873144507408142, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17648598551750183, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17310599982738495, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15781724452972412, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08815624564886093, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08431322872638702, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09777364879846573, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09042441844940186, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08930953592061996, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07974322140216827, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07598783820867538, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04985031858086586, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0432816706597805, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04225262999534607, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04200764745473862, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.024845607578754425, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0218343622982502, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021759964525699615, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020141445100307465, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019988149404525757, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013139362446963787, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.013326028361916542, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012801449745893478, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009028883650898933, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23672367632389069, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2231263816356659, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21890707314014435, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.199712336063385, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11124496906995773, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10641871392726898, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12328231334686279, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11403462290763855, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11269909143447876, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10069506615400314, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09588100016117096, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06266557425260544, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.054420772939920425, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05315432325005531, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.052855681627988815, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031218260526657104, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027051236480474472, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026966314762830734, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02486802265048027, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024673422798514366, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016217360273003578, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015871062874794006, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015792831778526306, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009926225058734417, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.31.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23700153827667236, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21225911378860474, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20192617177963257, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1795966476202011, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10868167877197266, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09873518347740173, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1289607137441635, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11846615374088287, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11217532306909561, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.0938563421368599, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0885980874300003, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06561072170734406, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.056714385747909546, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.052277762442827225, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.051194384694099426, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03305377438664436, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02735387161374092, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.0269462987780571, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024447189643979073, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023720528930425644, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018102210015058517, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017687905579805374, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016729336231946945, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012186223641037941, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.32.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13665571808815002, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12843206524848938, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12509995698928833, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11377952247858047, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06403326988220215, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06073305755853653, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07276072353124619, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06705880910158157, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06490689516067505, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05782417207956314, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05522528663277626, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.036985382437705994, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03209938481450081, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.030695756897330284, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.030373763293027878, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01850823499262333, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01580006815493107, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0156649611890316, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014546109363436699, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014334253035485744, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009673289954662323, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009650110267102718, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00921605434268713, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006345939356833696, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.32.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11114402115345001, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10440938174724579, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10116884857416153, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09197971969842911, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.051972709596157074, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04903996363282204, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05960590019822121, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05522501468658447, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.052693042904138565, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04692014679312706, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0447479747235775, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03023754060268402, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026386315003037453, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02492176927626133, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02456442452967167, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015131987631320953, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012851802632212639, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012693993747234344, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011835211887955666, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011614581570029259, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007927926257252693, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007955651730298996, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007476081606000662, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005253226961940527, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.32.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23736220598220825, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22346429526805878, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21862879395484924, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1990656703710556, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11149514466524124, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10622923076152802, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12535624206066132, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11528732627630234, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1129598468542099, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10076533257961273, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09602051228284836, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06380219757556915, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0550922267138958, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0533033087849617, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05287947505712509, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03182306885719299, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027163458988070488, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027002885937690735, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024949118494987488, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02467697486281395, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016449954360723495, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016072910279035568, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015753306448459625, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010081456042826176, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.32.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10328823328018188, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0973363071680069, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09546984732151031, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08634241670370102, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04922064393758774, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.047041766345500946, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.055002082139253616, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05042492598295212, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04984147474169731, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04428715258836746, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04178687930107117, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028469696640968323, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024457084015011787, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02390306442975998, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02377401851117611, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01428785640746355, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012720366008579731, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012685496360063553, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01175950188189745, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01167298574000597, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007867660373449326, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008274450898170471, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007703773677349091, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.006205773446708918, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.32.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15920592844486237, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.14996054768562317, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14703033864498138, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13409097492694855, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07495555281639099, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07161635160446167, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0834415853023529, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07705149799585342, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07595913112163544, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06785787642002106, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06466781347990036, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04259183257818222, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0370069220662117, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.036073874682188034, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03586291894316673, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021389614790678024, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.018858743831515312, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.01879793033003807, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017453117296099663, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017315618693828583, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011562321335077286, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011831831187009811, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01126659382134676, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008400717750191689, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.32.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2233283519744873, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2104923576116562, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20650863647460938, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1884099841117859, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10486728698015213, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10031219571828842, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11659902334213257, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10761646181344986, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10623160749673843, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09495341777801514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0904734879732132, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05921883508563042, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05140598863363266, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05017105117440224, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04987774416804314, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02967594750225544, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025639479979872704, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025551361963152885, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023597681894898415, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023407956585288048, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01564120687544346, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015190787613391876, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015229405835270882, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009720385074615479, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.32.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.21373267471790314, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19175201654434204, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.182110995054245, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16240185499191284, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.09814601391553879, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08899135887622833, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.11651232838630676, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.10744631290435791, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10124508291482925, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08499155938625336, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08047446608543396, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.05951450765132904, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05186649039387703, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04754369333386421, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04644587263464928, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.030033688992261887, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02538859099149704, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.024972615763545036, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.022877324372529984, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022190725430846214, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016204996034502983, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017107289284467697, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014802664518356323, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.0125378193333745, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.33.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1198103129863739, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11281478404998779, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10999882221221924, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10018087923526764, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05612865090370178, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05334993824362755, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06365018337965012, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05870960280299187, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.056884367018938065, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05082417279481888, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0485871396958828, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.032355815172195435, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028095347806811333, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026911336928606033, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026626253500580788, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01619238592684269, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013866564258933067, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013751484453678131, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012791593559086323, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012617015279829502, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008494159206748009, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00849165115505457, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00811681803315878, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005625828634947538, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.33.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0998220145702362, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09387336671352386, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09124070405960083, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08302304148674011, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0466565378010273, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.044167667627334595, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.053252872079610825, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0492764487862587, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.047277405858039856, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04220687597990036, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04030904546380043, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02701883390545845, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023533497005701065, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0223468579351902, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022057993337512016, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013515849597752094, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011489230208098888, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011365462094545364, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01058525312691927, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010408338159322739, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007061016745865345, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007030312903225422, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006689933594316244, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00457767816260457, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.33.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2324327826499939, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21902254223823547, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2144845426082611, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19561733305454254, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1090703010559082, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10412333160638809, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12218303978443146, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11249804496765137, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1105131059885025, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09875331819057465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09420240670442581, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.062092363834381104, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.053713876754045486, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05212092399597168, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05173225700855255, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0309743694961071, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026481332257390022, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02634919248521328, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02434718795120716, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024103764444589615, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015948545187711716, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015533377416431904, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015329591929912567, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00958213023841381, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.33.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1591518670320511, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14819416403770447, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1446787416934967, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1281939446926117, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.07511580735445023, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07108611613512039, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08418208360671997, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07736340165138245, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.07627902179956436, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06622125953435898, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06111305579543114, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.043490998446941376, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03769085556268692, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03667174279689789, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.036414895206689835, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.022008368745446205, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01990102231502533, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.019835704937577248, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018236877396702766, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0180862694978714, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012502859346568584, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013467316515743732, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012195229530334473, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010566766373813152, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.33.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15065155923366547, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.14204639196395874, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1393108367919922, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.12707734107971191, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07088639587163925, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.06780347228050232, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.07863586395978928, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07273822277784348, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07178468257188797, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06424902379512787, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.061252396553754807, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04012903571128845, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0348971001803875, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0340481698513031, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0338500440120697, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.020067960023880005, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.017717892304062843, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.017659924924373627, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.016392622143030167, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0162678062915802, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010671750642359257, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.010990720242261887, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010393035598099232, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007669579703360796, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.33.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22025363147258759, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2077527791261673, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20393970608711243, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18613237142562866, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10343515872955322, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09902192652225494, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11452493071556091, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10605877637863159, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10477215051651001, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09375873953104019, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08932969719171524, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05821841582655907, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.050630152225494385, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0494479238986969, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04916677623987198, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029050568118691444, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025199145078659058, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025114791467785835, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023205047473311424, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0230222400277853, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015055648982524872, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01482993084937334, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014654344879090786, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009348954074084759, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.33.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.21442671120166779, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19292393326759338, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18338274955749512, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16399253904819489, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.09830611199140549, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08936523646116257, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.11698537319898605, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.10773099213838577, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10141924768686295, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08557690680027008, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08111370354890823, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.05957193300127983, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05171963572502136, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04739503934979439, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.046324584633111954, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.02994086593389511, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.024975983425974846, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.024567311629652977, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.022495560348033905, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.021811602637171745, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01617436297237873, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01642027124762535, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01482469029724598, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011584047228097916, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.34.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10900553315877914, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10258994996547699, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09996438026428223, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09104771912097931, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05098741129040718, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04839511588215828, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05795793607831001, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05344998091459274, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.051654599606990814, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04617619141936302, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04418138414621353, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029498854652047157, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025595758110284805, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024464963003993034, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02419242635369301, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014774234034121037, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012645990587770939, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01253248006105423, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011683420278131962, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011514075100421906, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007766990922391415, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007816246710717678, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007410775404423475, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0052579110488295555, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.34.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09169816225767136, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08625198155641556, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08369981497526169, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0761587917804718, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.042785219848155975, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.040438681840896606, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04908103868365288, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04543573036789894, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04336279630661011, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03872225806117058, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03698490187525749, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02485745958983898, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02168867364525795, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020503664389252663, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02021731063723564, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012439948506653309, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010560039430856705, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01042851060628891, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009737683460116386, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009556873701512814, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00651451013982296, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006504269316792488, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006155254319310188, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004261849448084831, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.34.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23048441112041473, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.217304065823555, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2128143310546875, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19403760135173798, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1081913560628891, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10323113203048706, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12122604995965958, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11168328672647476, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1095733568072319, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09796083718538284, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0934513658285141, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0616673082113266, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.053300924599170685, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05169452354311943, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05132005736231804, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03075128234922886, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026315396651625633, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026175832375884056, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024207614362239838, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023957176133990288, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01584559865295887, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015498040243983269, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015220622532069683, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009664698503911495, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.34.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.17618484795093536, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.15332075953483582, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.14590990543365479, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.12419767677783966, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.08215523511171341, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07362020760774612, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09471555799245834, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.08680564910173416, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08455713838338852, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06590385735034943, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06041483208537102, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.04905835539102554, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.042304400354623795, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04018518328666687, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.039690952748060226, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.024870315566658974, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.021939756348729134, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.021796947345137596, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.018924003466963768, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.018610535189509392, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01413056068122387, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015096488408744335, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013500932604074478, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011831769719719887, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.34.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15113221108913422, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1425958126783371, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.13992078602313995, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1277872920036316, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.071086086332798, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.06804049015045166, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.07895343005657196, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07290706783533096, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07198090106248856, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06450730562210083, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06156375631690025, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04022449627518654, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0349937379360199, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.0341615229845047, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03396420180797577, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.020174982026219368, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.017823779955506325, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.017770767211914062, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.016520900651812553, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01640479266643524, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010851245373487473, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011124659329652786, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010578494518995285, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00785640999674797, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.34.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2194247990846634, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20721183717250824, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20339709520339966, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18577304482460022, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10305868834257126, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09870941936969757, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1142066940665245, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10562364012002945, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10434997826814651, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09355396777391434, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08915319293737411, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05801311507821083, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05046802759170532, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.049304623156785965, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04903580620884895, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029044678434729576, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02518538013100624, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025114474818110466, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023234695196151733, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023068299517035484, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01524007972329855, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014911781065165997, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014862312003970146, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00955671165138483, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.34.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.21244622766971588, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19066424667835236, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18070632219314575, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1614786833524704, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.09738757461309433, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08815091848373413, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.11646547168493271, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1073133572936058, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10052094608545303, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08442910760641098, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08011892437934875, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.05930839851498604, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05139355733990669, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04690694063901901, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04580269381403923, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.02972324937582016, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.024633007124066353, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.024193361401557922, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02209915593266487, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.021377483382821083, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01592620275914669, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01612604409456253, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.014502446167171001, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.0112101836130023, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.35.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10090436041355133, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09494112432003021, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0924382209777832, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08419501036405563, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04707852378487587, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04466209560632706, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.053540416061878204, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.049510687589645386, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.047704651951789856, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.042652424424886703, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04077259823679924, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02717999368906021, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0236516110599041, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02255156636238098, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022286491468548775, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013595666736364365, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011610536836087704, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01149743888527155, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01071296539157629, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010541361756622791, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007130119949579239, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007107110694050789, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0067794607020914555, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00466986745595932, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.35.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08540399372577667, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08032497018575668, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0778772309422493, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0708470344543457, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03978510573506355, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03756451606750488, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.045743390917778015, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04231608659029007, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04031716659665108, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03600572049617767, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03443235158920288, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02317199297249317, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.020206868648529053, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.019056815654039383, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.018783578649163246, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01159032341092825, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009813150390982628, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009686697274446487, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009053399786353111, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008877155371010303, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006076441146433353, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006058008410036564, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005724070593714714, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003965094219893217, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.35.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22565776109695435, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21265862882137299, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2079901397228241, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.18957799673080444, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10558365285396576, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1006842777132988, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11873222887516022, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10931392759084702, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10694941878318787, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0956321433186531, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09126396477222443, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06031354144215584, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05216565355658531, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05044449120759964, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0500197559595108, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030082613229751587, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025683393701910973, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025531137362122536, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02362927794456482, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023362739011645317, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015549306757748127, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015165598131716251, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014897692017257214, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009458387270569801, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.35.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19578911364078522, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1710730493068695, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.16232380270957947, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13287633657455444, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09080909937620163, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08151279389858246, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1063813865184784, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0977708026766777, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09427060186862946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0728701651096344, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06547535955905914, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.055156443268060684, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04759536683559418, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.044432658702135086, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04368435591459274, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02794833295047283, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024270527064800262, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024051200598478317, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020962445065379143, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020461732521653175, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015792373567819595, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01684724912047386, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014823852106928825, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013124566525220871, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.35.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15307623147964478, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1444634199142456, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1417706161737442, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.12950032949447632, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07194271683692932, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.06889217346906662, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.07986254245042801, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07378335297107697, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07282806932926178, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06530875712633133, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.062354400753974915, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0406608060002327, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03537030518054962, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03453227877616882, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.034335918724536896, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02037501335144043, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.017955578863620758, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.017898162826895714, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.016632338985800743, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.01650826260447502, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010888222604990005, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01111480500549078, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010613122954964638, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007735955063253641, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.35.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2177962362766266, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2056911140680313, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20201288163661957, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1846403181552887, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10217519104480743, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09793210029602051, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11333854496479034, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1046893522143364, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10346747934818268, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09282311797142029, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08865402638912201, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05748763680458069, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05002189800143242, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04887670651078224, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.048603206872940063, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028752701357007027, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024984611198306084, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024905232712626457, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023063482716679573, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022895894944667816, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015012132935225964, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01481105387210846, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014634235762059689, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009505695663392544, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.35.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22217684984207153, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19897542893886566, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.1890689730644226, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16905800998210907, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1018812283873558, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09237010776996613, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1217009648680687, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11135147511959076, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10518746078014374, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08815906196832657, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08363064378499985, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.061952002346515656, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05345404893159866, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04918207600712776, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.048127129673957825, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.031153393909335136, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026000715792179108, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025610463693737984, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023351378738880157, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02267322689294815, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016956448554992676, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017171457409858704, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01557154767215252, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012264908291399479, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.36.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09900974482297897, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09313999861478806, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09063895046710968, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08252716809511185, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.046174898743629456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04379934445023537, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05258246883749962, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.048537615686655045, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04678279906511307, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04181507229804993, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.040014781057834625, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02672099694609642, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02320644073188305, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02213173545897007, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021878011524677277, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0133747523650527, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011420092545449734, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01131086703389883, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010545344091951847, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010385224595665932, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0070295934565365314, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007031977642327547, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006694887764751911, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004676471929997206, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.36.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08320946991443634, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.07826939970254898, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0756862610578537, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.06887104362249374, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03870880976319313, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03648882359266281, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.044740788638591766, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.041415635496377945, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.039237841963768005, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03505716100335121, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.033521343022584915, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.022664422169327736, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.019772956147789955, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.018560638651251793, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01827141083776951, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011354003101587296, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009592770598828793, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009457550942897797, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.008857909590005875, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008675504475831985, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.005991565529257059, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005984873976558447, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0056261480785906315, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003969712182879448, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.36.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23179347813129425, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.21846608817577362, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2139802873134613, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19512800872325897, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10858334600925446, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10372455418109894, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1216273158788681, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11200723797082901, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10996939986944199, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09838616102933884, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09384132921695709, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06181650608778, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05345424264669418, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05185701698064804, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05147653445601463, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030827617272734642, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026351789012551308, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026216639205813408, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02424049563705921, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02399509958922863, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015884429216384888, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015463425777852535, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01526169665157795, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009545980952680111, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.36.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.19682909548282623, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16717395186424255, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1569090038537979, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1309133768081665, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09090682119131088, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07985932379961014, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10668674856424332, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09800205379724503, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09385485202074051, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07123950123786926, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06458593904972076, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.055032841861248016, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04799262806773186, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04483746364712715, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.044082313776016235, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.027979549020528793, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025023140013217926, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024791333824396133, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021551959216594696, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021073708310723305, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016061389818787575, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01796529069542885, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0151494350284338, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014519823715090752, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.36.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.15115264058113098, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.14266438782215118, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14001064002513885, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.12794090807437897, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07100974023342133, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.0679905116558075, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.07865779846906662, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0727892592549324, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07189355045557022, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06445827335119247, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06148434057831764, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04008150100708008, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.034861087799072266, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03404323011636734, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03385832905769348, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.020031752064824104, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.01763308234512806, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0175775159150362, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01631847396492958, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.016199225559830666, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010605272836983204, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.010811053216457367, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010344631038606167, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007408714387565851, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.36.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2145947813987732, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2026716023683548, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1989881843328476, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18191266059875488, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10066427290439606, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09648535400629044, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1113942414522171, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1031150072813034, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10191508382558823, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09144887328147888, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08718127012252808, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05655727535486221, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04920443892478943, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04809369891881943, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.047821056097745895, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028224743902683258, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024479180574417114, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02440653368830681, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022578686475753784, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022408377379179, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014605512842535973, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014356404542922974, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.0142319081351161, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008998575620353222, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.36.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.21534821391105652, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19276221096515656, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18260377645492554, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16351057589054108, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.0986674576997757, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08924464881420135, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.11851682513952255, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.10860169678926468, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10189569741487503, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08545047044754028, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08132617175579071, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0603288933634758, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05211595818400383, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.047650884836912155, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04654235020279884, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.030361898243427277, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02522219344973564, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02479974739253521, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.022663163021206856, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.021959183737635612, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016560493037104607, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01674564927816391, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015135912224650383, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011971778236329556, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.37.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10236062854528427, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0964227169752121, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09404993057250977, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08577993512153625, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04778098315000534, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04546515271067619, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05393492802977562, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.049931563436985016, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.048422448337078094, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04333946108818054, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04136406630277634, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027346394956111908, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023834330961108208, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022860098630189896, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022626740857958794, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01368025504052639, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01170317642390728, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011603863909840584, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010793826542794704, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010646357201039791, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007138849701732397, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007050505839288235, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006823316216468811, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004523288458585739, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.37.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08928242325782776, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08406350016593933, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08183378726243973, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07462088763713837, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04164167121052742, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.039546165615320206, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04742554947733879, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04381803423166275, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04219934716820717, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03778664022684097, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03613878786563873, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02405000478029251, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.020894650369882584, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.019930072128772736, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.019696123898029327, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012013129889965057, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010206741280853748, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010104876942932606, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009416752494871616, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009269075468182564, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0062560345977544785, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006172016263008118, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005945094861090183, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003945866134017706, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.37.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23499424755573273, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22168946266174316, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2173856645822525, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19854213297367096, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11016340553760529, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10533784329891205, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12296002358198166, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11338463425636292, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11153487116098404, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09996698796749115, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0954727903008461, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06242777034640312, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05409998446702957, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0526202991604805, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05225635692477226, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03112630918622017, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026710880920290947, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02659516967833042, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024600451812148094, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024385273456573486, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01602848619222641, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015612068586051464, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015470599755644798, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009619579650461674, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.37.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21260690689086914, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1740071177482605, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15867899358272552, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13047434389591217, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0963931754231453, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08094596862792969, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11909878253936768, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10914573818445206, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1023034006357193, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07298506051301956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06751769781112671, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06154748052358627, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05291636288166046, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0471126027405262, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04565805196762085, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031000666320323944, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025641169399023056, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025201329961419106, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021331608295440674, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020357053726911545, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01726384647190571, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018059592694044113, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015489366836845875, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013655121438205242, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.37.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.162981778383255, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.15385526418685913, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15101683139801025, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1379590779542923, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07652842253446579, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07331481575965881, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08483340591192245, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07844259589910507, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07750292867422104, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06950490176677704, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.066305972635746, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04316707327961922, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03755393996834755, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03668826073408127, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.036485981196165085, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.021575603634119034, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0189767275005579, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.018918976187705994, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.017565030604600906, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.017437413334846497, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011394668370485306, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.011599056422710419, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011115201748907566, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007901200093328953, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.37.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.22390085458755493, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21153827011585236, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2077597677707672, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18996615707874298, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.105043426156044, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10067449510097504, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11647546291351318, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10757461935281754, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10633164644241333, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0954643040895462, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09120884537696838, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05919905751943588, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05136260762810707, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05021824315190315, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.049946267157793045, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.029602419584989548, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025635860860347748, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02555771917104721, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02366515062749386, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02349632978439331, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015509453602135181, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015149264596402645, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015131091699004173, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009665415622293949, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.37.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22163891792297363, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.19855758547782898, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.18796154856681824, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.16825832426548004, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10151153802871704, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0917971283197403, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12214366346597672, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1121317595243454, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10490139573812485, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08807454258203506, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08366493880748749, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06197772175073624, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.053777486085891724, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04900555685162544, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04782002046704292, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.031092653051018715, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.025886647403240204, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02542017214000225, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023278512060642242, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022506294772028923, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.016701344400644302, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017146006226539612, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015164673328399658, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012130065821111202, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.38.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10133776813745499, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0953206717967987, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09289447218179703, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08461084216833115, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.047288645058870316, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.044888660311698914, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05357000231742859, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.049558110535144806, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.047922056168317795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04281327500939369, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04086484760046005, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02716798521578312, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.023668212816119194, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.022621046751737595, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02237803302705288, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013588490895926952, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011604253202676773, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011502839624881744, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0106936264783144, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010540126822888851, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007104312069714069, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0070338500663638115, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006765636149793863, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004550296347588301, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.38.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08541889488697052, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08032655715942383, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.07797697931528091, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07104462385177612, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.03982274979352951, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03765492141246796, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04570913687348366, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0422111339867115, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0403640940785408, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03607293590903282, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.034480296075344086, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02315283752977848, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.020153069868683815, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.019068120047450066, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.018809549510478973, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.011577821336686611, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.009791123680770397, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.009672826156020164, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009028729051351547, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.008866656571626663, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006051508244127035, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.005988131742924452, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005707053933292627, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003866885555908084, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.38.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2342604547739029, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22076518833637238, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2162778675556183, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19727708399295807, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10977243632078171, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10487876087427139, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12268231064081192, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11315245926380157, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11120514571666718, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09945571422576904, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09486941248178482, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06231377273797989, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05395694822072983, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.052419427782297134, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05205697938799858, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03105679340660572, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026634927839040756, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026511795818805695, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02450411021709442, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024269763380289078, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015980541706085205, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015608451329171658, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015390879474580288, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00966130755841732, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.38.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2001163810491562, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1676536649465561, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15686380863189697, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13366447389125824, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0921986922621727, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07887222617864609, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10791656374931335, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0991855189204216, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09617559611797333, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07220915704965591, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06586793065071106, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05562681704759598, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04808652400970459, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.044901032000780106, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04413154348731041, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.028025921434164047, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024142591282725334, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02395494468510151, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020374441519379616, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01986328698694706, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015444459393620491, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01632850430905819, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01445237547159195, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012300606817007065, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.38.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1725527048110962, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1628851741552353, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15990513563156128, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.14613205194473267, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08104629814624786, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07766146212816238, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08972368389368057, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08305143564939499, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0820634663105011, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07361432909965515, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07019443064928055, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04569119215011597, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03974374383687973, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.038841694593429565, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.038629114627838135, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.022824328392744064, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020055031403899193, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.019997643306851387, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.01855285093188286, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.018419155851006508, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012035692110657692, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012205073609948158, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011740431189537048, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008257013745605946, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.38.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2284536361694336, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21575309336185455, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21186193823814392, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19363713264465332, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10717962682247162, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10274039953947067, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11850383877754211, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10975226014852524, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10853979736566544, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09736406058073044, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09280604869127274, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06021253764629364, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05236635357141495, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05120494216680527, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05092794448137283, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030030585825443268, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026048997417092323, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02597290463745594, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024023041129112244, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02384766936302185, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01554432325065136, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015258131548762321, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015160353854298592, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009540078230202198, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.38.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.22686216235160828, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.20342978835105896, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.19288747012615204, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.17259696125984192, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10398205369710922, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.0941896066069603, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12467370182275772, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11440173536539078, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10737787187099457, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09027117490768433, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08563464879989624, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06337578594684601, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.054861780256032944, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05013851821422577, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04897605627775192, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.032069142907857895, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026394126936793327, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02593723125755787, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023719465360045433, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.022959094494581223, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017730820924043655, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017342334613204002, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016296792775392532, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01215151697397232, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.39.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10373889654874802, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0975443571805954, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09502860903739929, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08661136031150818, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.048432815819978714, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04596887156367302, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05517952889204025, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05085945874452591, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.049096062779426575, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.043867968022823334, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04192338511347771, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027987170964479446, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02429593726992607, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023205911740660667, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02294018492102623, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014027753844857216, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011921936646103859, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011808468960225582, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010994653217494488, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010828856378793716, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007362571079283953, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0072557078674435616, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007001234218478203, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00472303805872798, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.39.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.08922125399112701, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08390890061855316, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08154312521219254, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07431140542030334, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04164592921733856, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.03945888578891754, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04753204435110092, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04394485801458359, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04222921282052994, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.037719763815402985, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03601779416203499, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02410109154880047, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.020968133583664894, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.019928373396396637, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.019682591781020164, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0120440274477005, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010204830206930637, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010094908997416496, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009403917007148266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009247897192835808, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006268088240176439, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006183839403092861, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.005934524349868298, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.003955310210585594, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.39.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23582851886749268, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22217188775539398, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.21762026846408844, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.19862094521522522, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11059126257896423, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10562244057655334, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12365367263555527, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11396351456642151, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11201156675815582, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1002177894115448, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09556646645069122, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06280523538589478, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05439075455069542, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05281681939959526, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0524386428296566, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031317997723817825, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026833605021238327, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026707550510764122, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02467246539890766, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024440433830022812, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016121622174978256, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015709910541772842, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015523768961429596, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009684622287750244, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.39.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.14396539330482483, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12120230495929718, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11492419242858887, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09451282024383545, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06536613404750824, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.057609256356954575, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0764099508523941, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06944270431995392, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06769669055938721, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05159755423665047, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0463656447827816, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03917344659566879, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.034322939813137054, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.032397519797086716, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03195923566818237, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.020171234384179115, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.018348736688494682, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01823221519589424, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.015950867906212807, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015664976090192795, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.011877982877194881, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013350183144211769, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.011348580941557884, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011028535664081573, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.39.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.18382303416728973, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1734689623117447, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17028507590293884, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.15554684400558472, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0864076092839241, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08277495205402374, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09577540308237076, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.08856235444545746, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08751761168241501, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07842974364757538, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07485705614089966, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04883526638150215, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.042389869689941406, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.041414037346839905, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04118337854743004, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0244143083691597, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02137376181781292, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.021308787167072296, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019760463386774063, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019617188721895218, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.012925287708640099, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012987025082111359, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012605244293808937, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008761928416788578, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.39.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23524248600006104, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2220882773399353, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21812967956066132, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19928568601608276, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11048654466867447, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10590237379074097, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12242989242076874, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11316987127065659, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11187777668237686, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10028714686632156, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09579276293516159, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.062392156571149826, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05406336858868599, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05284782499074936, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.052560605108737946, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03119468502700329, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027024108916521072, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.026940491050481796, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02493942901492119, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024755369871854782, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016417820006608963, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01602748967707157, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016025280579924583, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010317117907106876, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.39.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23273178935050964, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21045902371406555, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2012600302696228, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18029259145259857, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10691209137439728, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.09801185876131058, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12519900500774384, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1155935600399971, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.10997667163610458, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09332174062728882, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08836480230093002, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.063638836145401, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05524235963821411, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05131383612751961, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05035668984055519, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03187855705618858, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02659686468541622, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.0262367632240057, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02388772740960121, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023250212892889977, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01692243665456772, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.016784939914941788, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015663599595427513, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01121512521058321, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.40.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10544389486312866, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09920980036258698, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09660179913043976, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08801987022161484, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04925281181931496, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04675716906785965, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05584760010242462, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.051659177988767624, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04991878569126129, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04457443207502365, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04252158850431442, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028318937867879868, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024661434814333916, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023574434220790863, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02332121692597866, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014167673885822296, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01209252793341875, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011987483128905296, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011144614778459072, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01097754668444395, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007410360034555197, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007332345470786095, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0070627592504024506, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004742398392409086, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.40.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09011667221784592, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08479295670986176, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08239768445491791, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07504860311746597, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04209495335817337, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.039862632751464844, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04820052906870842, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04449812322854996, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04266699030995369, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03812992945313454, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03645190969109535, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.024436848238110542, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.021253317594528198, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020155346021056175, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.01988922245800495, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01222172100096941, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010321659967303276, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01020487304776907, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.00951301958411932, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009346659295260906, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0063753146678209305, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006268403027206659, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006031589582562447, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004008272662758827, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.40.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24119111895561218, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22730880975723267, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2227260023355484, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20310679078102112, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11313317716121674, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10806846618652344, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12636464834213257, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11656440049409866, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11461508274078369, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10244011878967285, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09770216792821884, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06419901549816132, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0556025505065918, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05403600633144379, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.053657032549381256, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032020892947912216, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027422742918133736, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027298666536808014, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02521209791302681, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.024970732629299164, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016444308683276176, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016024881973862648, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01584089733660221, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009844036772847176, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.40.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20297269523143768, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.172629714012146, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.16094112396240234, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13527584075927734, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09334225952625275, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0812707394361496, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11168518662452698, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1028718501329422, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09742281585931778, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07322683930397034, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06872133165597916, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.057639364153146744, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.049617040902376175, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04525894299149513, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04417354613542557, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02892925590276718, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023943275213241577, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02358427457511425, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02003682777285576, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01931167021393776, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01567930169403553, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015922151505947113, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014308367855846882, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011350746266543865, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.40.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19023224711418152, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.17951059341430664, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1762392818927765, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16101166605949402, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08944033831357956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08566031605005264, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09914106875658035, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09163075685501099, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09055076539516449, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08112692832946777, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0774097666144371, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.050429411232471466, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04383784905076027, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.042834680527448654, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04259806126356125, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025221357122063637, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0220775343477726, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022010255604982376, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02040169946849346, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02025805599987507, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013291303999722004, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01337712723761797, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01296937558799982, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.008962086401879787, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.40.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23750224709510803, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22422772645950317, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.220107302069664, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20108731091022491, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11154522001743317, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10684610903263092, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1234038695693016, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11424405872821808, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11295332759618759, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10122983902692795, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09651049226522446, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06272304058074951, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.054552000015974045, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05332580953836441, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.053029000759124756, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.031327635049819946, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.027181899175047874, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027103571221232414, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02506355568766594, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02487754262983799, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016303090378642082, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01600060611963272, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01590205542743206, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010123344138264656, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.40.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23732449114322662, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21467705070972443, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20530597865581512, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18352513015270233, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.10903865098953247, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10002908110618591, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.12789522111415863, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1178981214761734, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11214171350002289, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09511161595582962, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.08989424258470535, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06480930745601654, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.056246597319841385, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.052283693104982376, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05131273344159126, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.032416265457868576, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026988431811332703, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.026625491678714752, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024198485538363457, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.023554567247629166, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01707511767745018, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.016886023804545403, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.015781648457050323, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011085374280810356, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.41.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10415738075971603, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09799106419086456, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09534461796283722, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08692625164985657, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04868489131331444, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04617059975862503, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05552688613533974, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0511738583445549, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.049349959939718246, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04406333342194557, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.042159199714660645, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028172746300697327, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02444128878414631, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023306062445044518, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02302953600883484, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014089017175137997, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011944286525249481, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011825098656117916, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011000288650393486, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010832344181835651, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007350356783717871, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007230973802506924, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0069839246571063995, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004645923618227243, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.41.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09074309468269348, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0853613093495369, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08295023441314697, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07560553401708603, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04237887263298035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.040102846920490265, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.048757411539554596, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04484511539340019, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04296035319566727, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.03837551921606064, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03681960329413414, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.024694083258509636, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.021402327343821526, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020300475880503654, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020034059882164, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.012348937802016735, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010406196117401123, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010290199890732765, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009591013193130493, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009421366266906261, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006450914777815342, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006326665636152029, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006105742882937193, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004055866040289402, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.41.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24130865931510925, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.22725245356559753, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22273992002010345, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.203131303191185, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11332039535045624, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1081930473446846, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1266716718673706, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11675485968589783, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11480505764484406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10258124768733978, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09789872169494629, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06441750377416611, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05575272813439369, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05416744574904442, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05379769951105118, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032132383435964584, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02758697234094143, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02746870554983616, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025374481454491615, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025135088711977005, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016596456989645958, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016279947012662888, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015981007367372513, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01021571084856987, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.41.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20147573947906494, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.16737401485443115, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1552107185125351, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1349499672651291, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09145407378673553, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07727430760860443, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11133740097284317, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10145945101976395, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.097027488052845, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07289988547563553, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06858456879854202, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05752178281545639, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04956207796931267, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04507919400930405, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04396212846040726, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029143614694476128, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02495376393198967, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02468019723892212, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02157469652593136, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020878225564956665, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016557589173316956, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017857743427157402, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01522496622055769, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.014097294770181179, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.41.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.19500240683555603, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18401950597763062, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18061861395835876, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16493824124336243, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0916791707277298, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08781064301729202, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10150189697742462, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09398363530635834, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09286389499902725, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08319491147994995, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07925216108560562, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0517132431268692, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04495318606495857, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.043918438255786896, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04367419332265854, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.025823216885328293, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022617891430854797, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022552283480763435, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020897094160318375, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02074475958943367, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013571145944297314, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01367855817079544, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013235668651759624, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00913494173437357, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.41.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23951590061187744, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2261299192905426, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22196964919567108, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2028045505285263, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11249416321516037, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1077553853392601, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12448541820049286, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11524093896150589, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11390182375907898, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.1021016538143158, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09736096858978271, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06324372440576553, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0550166554749012, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05377105623483658, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.053472910076379776, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03158337250351906, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0273676086217165, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.027284279465675354, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025212887674570084, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025032293051481247, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016404004767537117, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01604516990482807, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01599135808646679, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010051007382571697, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.41.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.23946520686149597, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21698564291000366, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20768746733665466, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1855253428220749, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11020542681217194, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10122167319059372, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1289767175912857, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.11906836181879044, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11327800899744034, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09625841677188873, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09100273996591568, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06563545763492584, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.056988783180713654, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05296570062637329, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.051969315856695175, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03299444913864136, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027533121407032013, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027159197255969048, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024762708693742752, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.0241127647459507, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017559992149472237, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017487093806266785, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016239751130342484, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011829645372927189, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.42.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11367129534482956, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10691677778959274, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1042398065328598, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09493497014045715, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05317366123199463, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05050383135676384, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06006497144699097, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.055606767535209656, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05388271063566208, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04808572307229042, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04584040492773056, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030428344383835793, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026537258177995682, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02543351985514164, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025171125307679176, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015203945338726044, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012996847741305828, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012887776829302311, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011957932263612747, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011787822470068932, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007891819812357426, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007791816722601652, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007533766329288483, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004938524216413498, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.42.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09773753583431244, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09195225685834885, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08948776125907898, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08153726905584335, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.045734137296676636, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.043403513729572296, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05215837433934212, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04808147996664047, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.046358950436115265, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04140503332018852, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03963545337319374, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026479775086045265, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022947002202272415, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.021903492510318756, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021654659882187843, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013243039138615131, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011205301620066166, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011103624477982521, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01032235100865364, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010166192427277565, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006900064181536436, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0067615513689816, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006566427648067474, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004310491029173136, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.42.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24443717300891876, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2302483767271042, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22567510604858398, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20565874874591827, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11472178250551224, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10961636900901794, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12800800800323486, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11809006333351135, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11624636501073837, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10388164222240448, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09901335835456848, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06505481898784637, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05637700855731964, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.054818183183670044, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05445390194654465, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032444048672914505, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027859849855303764, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027736559510231018, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0256044901907444, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025369610637426376, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01671236753463745, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016313260421156883, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016125719994306564, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010081817395985126, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.42.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2012273222208023, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17937998473644257, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17158856987953186, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14743533730506897, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09293732792139053, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08469289541244507, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10820291936397552, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09975689649581909, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09623346477746964, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07774221897125244, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07179733365774155, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0555320680141449, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.047944411635398865, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04473872110247612, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04395262897014618, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02779749594628811, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.023242877796292305, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023011961951851845, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02020452916622162, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.019683493301272392, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014766361564397812, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.014716839417815208, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013714143075048923, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.009961122646927834, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.42.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20170050859451294, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19026844203472137, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18675197660923004, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1705566644668579, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09487570822238922, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09085211157798767, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10520900785923004, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09726770222187042, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09610191732645035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08602574467658997, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08200516551733017, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05356806889176369, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04652223736047745, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04543787240982056, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04519755393266678, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.026778755709528923, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.023379843682050705, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023305222392082214, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.021574510261416435, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021418295800685883, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014115885831415653, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014089696109294891, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013768237084150314, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009357125498354435, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.42.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24202468991279602, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.22830653190612793, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22414882481098175, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20469801127910614, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11367341876029968, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10885971784591675, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12574689090251923, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11645755171775818, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1151222214102745, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10309155285358429, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09822829812765121, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06387288123369217, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.055587008595466614, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05432290956377983, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.054017189890146255, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03187701851129532, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02761782333254814, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02753157913684845, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025425758212804794, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02523357793688774, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016485225409269333, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016152335330843925, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016070524230599403, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010046901181340218, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.42.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2438874989748001, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22128000855445862, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2120639979839325, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18926496803760529, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11229579895734787, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10339130461215973, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13095895946025848, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12083666026592255, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11533994227647781, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09802821278572083, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09239981323480606, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06636443734169006, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05765704810619354, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.053788088262081146, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.052846960723400116, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03322267159819603, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02765151858329773, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02729889564216137, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02476736716926098, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024136913940310478, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01747426576912403, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017092915251851082, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01621263660490513, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01101289689540863, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.43.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11326922476291656, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1064881831407547, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10363540798425674, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09433455765247345, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05299341306090355, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0502501018345356, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06020107492804527, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05568642169237137, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.053730838000774384, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04790282994508743, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.045677147805690765, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030564425513148308, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026620497927069664, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025394471362233162, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025096189230680466, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015303784050047398, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01303908322006464, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012914297170937061, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012000678107142448, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011815723963081837, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00802248902618885, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007939026691019535, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007627067621797323, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005153188016265631, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.43.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.0970827117562294, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09125363826751709, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0887196809053421, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08078192919492722, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04542088136076927, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04300951212644577, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0519903190433979, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.047941386699676514, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04605088755488396, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04108631983399391, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03927871584892273, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.026362359523773193, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.022884735837578773, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02175814099609852, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.021479060873389244, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013193024322390556, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011143552139401436, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011025803163647652, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01025332324206829, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010084646753966808, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0068779801949858665, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006756449118256569, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0065199933014810085, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004314787220209837, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.43.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24424555897712708, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23000019788742065, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22523415088653564, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20514851808547974, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11465241760015488, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10937374085187912, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1281864494085312, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11816123127937317, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11618426442146301, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10358503460884094, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09873088449239731, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06512877345085144, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.056364599615335464, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0547596737742424, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05437236279249191, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03247138857841492, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027786267921328545, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0276624895632267, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025502776727080345, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02525634504854679, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01668158546090126, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01623227447271347, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01605391502380371, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0099431611597538, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.43.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.17717306315898895, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.14189961552619934, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12908372282981873, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10854964703321457, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.07922258228063583, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06585203111171722, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.09828954190015793, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0897693857550621, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.08408419042825699, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.06062144413590431, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.056597549468278885, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.049593813717365265, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.044110119342803955, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03938988223671913, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0381733663380146, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02536207064986229, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02231242135167122, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.021968908607959747, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.019053179770708084, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01831642910838127, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014336363412439823, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.0165431946516037, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013001805171370506, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01337883248925209, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.43.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2070893496274948, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1952987015247345, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19164492189884186, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1748693585395813, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09746662527322769, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09328299760818481, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10807973146438599, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09991253912448883, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09873329848051071, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08830434828996658, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08411893248558044, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.054999444633722305, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04783788323402405, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.046715039759874344, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.046455543488264084, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.027560604736208916, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02408282645046711, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02401048131287098, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022227125242352486, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02206272818148136, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014604918658733368, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01458930503576994, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014244628138840199, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009780933149158955, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.43.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24513833224773407, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2311941385269165, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22691519558429718, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20714780688285828, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11524808406829834, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1103108748793602, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12785281240940094, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11808209121227264, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11671287566423416, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10442173480987549, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09950917959213257, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06495057046413422, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05643835291266441, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05513658747076988, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.054832495748996735, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03253842890262604, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.028155766427516937, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028065498918294907, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025929711759090424, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025733206421136856, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017134852707386017, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016642196103930473, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01672186329960823, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010619823820888996, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.43.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2420983612537384, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22003066539764404, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21059650182724, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18795624375343323, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11171300709247589, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10275571793317795, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13071757555007935, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1207847073674202, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11464878916740417, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09767886996269226, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09213559329509735, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06638089567422867, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.057800427079200745, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.0536712184548378, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.052671387791633606, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03326880559325218, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02785862237215042, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027472784742712975, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025063537061214447, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02440444566309452, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017535267397761345, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01763884536921978, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01618036814033985, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011852328665554523, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.44.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11784220486879349, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11080551147460938, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10802524536848068, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09839511662721634, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.055224109441041946, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0524522140622139, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.062383897602558136, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.057692382484674454, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05597672238945961, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04991607740521431, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04753318428993225, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03162146359682083, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02755013480782509, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026409853249788284, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026141703128814697, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01578598842024803, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013470751233398914, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013357982039451599, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012377708218991756, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012205677106976509, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0081753795966506, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008041833527386189, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007808481343090534, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005055482964962721, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.44.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10352756828069687, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.0973578542470932, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09479787945747375, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08638399094343185, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04851451516151428, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04602889344096184, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.055178526788949966, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05090972036123276, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.049174390733242035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0438607819378376, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0419197678565979, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027987902984023094, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024292582646012306, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023219138383865356, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022962108254432678, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013995756395161152, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011865725740790367, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011756431311368942, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010913161560893059, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0107480613514781, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007277631666511297, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007118735928088427, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006940879859030247, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004504472017288208, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.44.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24929296970367432, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23470743000507355, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23003265261650085, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20960299670696259, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11717241257429123, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11187773197889328, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13074687123298645, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12067683041095734, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11871826648712158, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10593555867671967, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10092808306217194, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06642960757017136, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05758295953273773, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0559808649122715, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.055597398430109024, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03313218802213669, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028388313949108124, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028260093182325363, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02604859322309494, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025808559730648994, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017019784078001976, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016544776037335396, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016408460214734077, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01011432334780693, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.44.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20958828926086426, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17559246718883514, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.16417565941810608, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14336130023002625, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09628844261169434, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08235060423612595, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1132926270365715, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1040225401520729, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1004851832985878, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07547076791524887, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0720265582203865, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.058347780257463455, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05020654574036598, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04666821286082268, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.045794252306222916, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02936612442135811, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024745769798755646, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02451520599424839, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02072218991816044, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020134644582867622, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01617533713579178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01635598950088024, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015078687109053135, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01184835098683834, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.44.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2117951661348343, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19971849024295807, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19591008126735687, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.17878715693950653, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09974836558103561, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09541895240545273, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11058762669563293, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10227934271097183, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10106007754802704, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09035111963748932, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08611292392015457, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05635186284780502, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04894143342971802, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04779248312115669, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.047521237283945084, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028204137459397316, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024627013131976128, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024548647925257683, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022713588550686836, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022547991946339607, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014903735369443893, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014907442033290863, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014535744674503803, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009979183785617352, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.44.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2479177862405777, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23370181024074554, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22940292954444885, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20930327475070953, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11656908690929413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11153445392847061, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12904472649097443, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11948339641094208, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11809509247541428, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10557415336370468, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.1004888266324997, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06558448821306229, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05705476552248001, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05572578310966492, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05541262775659561, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03274117037653923, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.028357483446598053, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028270136564970016, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.026081742718815804, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025882935151457787, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016979563981294632, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01662188023328781, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016541263088583946, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010386951267719269, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.44.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2510305941104889, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22793568670749664, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21847525238990784, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.19465042650699615, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11581797897815704, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10662978887557983, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1353815793991089, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12469476461410522, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11883707344532013, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10106752067804337, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09516433626413345, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06884591281414032, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.059557393193244934, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05553874373435974, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.0545680969953537, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03464393690228462, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02865443378686905, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028280388563871384, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025689665228128433, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02503414824604988, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018551155924797058, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017863519489765167, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017240824177861214, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01170832198113203, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.45.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12308064103126526, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11579275876283646, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1129181757569313, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1027025654911995, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0577169694006443, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05482545495033264, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06537561118602753, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06031186133623123, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.058490585535764694, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05214560776948929, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04973188042640686, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03318142518401146, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02880185656249523, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027628393843770027, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027350937947630882, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016599223017692566, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014141246676445007, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014027392491698265, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012998361140489578, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012819508090615273, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008673573844134808, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008499864488840103, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008295894600450993, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005425043869763613, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.45.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10703805088996887, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10063005983829498, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09798246622085571, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08921870589256287, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0501859188079834, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.047603994607925415, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0572822205722332, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.052695710211992264, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.050870053470134735, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04537411779165268, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.043388430029153824, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029088865965604782, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025169704109430313, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02403934858739376, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023769676685333252, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01454758457839489, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01230141893029213, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01218870934098959, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011313382536172867, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011140497401356697, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007601742632687092, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007418276276439428, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00724104093387723, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004727070219814777, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.45.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.25119510293006897, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23646627366542816, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23170270025730133, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.21090836822986603, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1180727556347847, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1126842275261879, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13176819682121277, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12158162891864777, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1196659654378891, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10666754096746445, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.1015949472784996, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06696298718452454, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05801943689584732, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05643431469798088, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05602993816137314, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03339638561010361, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02864839881658554, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028523679822683334, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.026271814480423927, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0260310061275959, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01718023046851158, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01673807203769684, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016571933403611183, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010297798551619053, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.45.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2207934558391571, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18747475743293762, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17542685568332672, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1502215415239334, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10079846531152725, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08791444450616837, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12107843160629272, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11103139817714691, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1060945987701416, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08131001889705658, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07554606348276138, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06250794231891632, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05363822728395462, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0489264614880085, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04776211455464363, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031310614198446274, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0259813591837883, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025662047788500786, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.022221365943551064, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021447673439979553, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017033260315656662, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01738154888153076, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015536773949861526, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.012524858117103577, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.45.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20868001878261566, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19664038717746735, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19286486506462097, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.175899937748909, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09828905761241913, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09398028999567032, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10898279398679733, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10083874315023422, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09959819912910461, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08893924951553345, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08472428470849991, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.055572960525751114, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.048277806490659714, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04712400957942009, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.046852294355630875, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02778077870607376, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024276765063405037, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02419644221663475, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02237795479595661, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022203443571925163, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01465629879385233, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014704381115734577, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01427513174712658, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009828262962400913, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.45.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2487505078315735, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23442482948303223, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22997891902923584, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2096710056066513, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11707241088151932, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11194682866334915, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12981347739696503, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12003657221794128, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11860397458076477, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10590256005525589, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.1008593887090683, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0661211609840393, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05737138167023659, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.056009624153375626, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05568868666887283, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.033009640872478485, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.028586968779563904, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028504177927970886, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02629953622817993, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02609478496015072, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017249329015612602, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01690545491874218, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01680007204413414, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01078440248966217, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.45.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2523933947086334, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22941865026950836, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21973395347595215, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.19558082520961761, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11656557768583298, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10733561962842941, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1358143538236618, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12571538984775543, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11957626789808273, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10171627253293991, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0956641361117363, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06902211159467697, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.060056135058403015, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.055963676422834396, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.054971519857645035, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03455272316932678, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028969986364245415, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02858036383986473, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025988196954131126, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02532394975423813, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01822766475379467, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018196655437350273, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01691916026175022, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01209460012614727, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.46.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1198849081993103, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11259382963180542, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10961788892745972, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09970442950725555, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05618835985660553, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05326783284544945, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06365589052438736, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05895347520709038, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05697983130812645, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05070998892188072, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.048230577260255814, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03226280212402344, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02812930941581726, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02690393291413784, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02660459280014038, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01613236591219902, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013757163658738136, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013634159229695797, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012631895020604134, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012442836537957191, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008397416211664677, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008272822946310043, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007998823188245296, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005255851894617081, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.46.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10489992797374725, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09854709357023239, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09585564583539963, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08720167726278305, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0491928867995739, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04659005254507065, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0560411773622036, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.051751405000686646, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04987523704767227, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04438892379403114, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04232979938387871, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02847183123230934, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024707278236746788, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02355806902050972, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.023287532851099968, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014231373555958271, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012066933326423168, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011952509172260761, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011086758226156235, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010913546197116375, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00743660144507885, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007303367368876934, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007074760273098946, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004676421172916889, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.46.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2506200969219208, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2356581836938858, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23080775141716003, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.21009181439876556, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11781240999698639, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11235564202070236, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13159647583961487, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1213504746556282, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11941204965114594, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10632110387086868, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10113003104925156, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06683839857578278, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05791574344038963, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.056300804018974304, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0559082105755806, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03333666920661926, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028560496866703033, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0284341461956501, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.026170877739787102, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02592437155544758, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017128048464655876, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016659464687108994, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016515104100108147, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010186279192566872, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.46.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20041221380233765, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1695253551006317, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15461769700050354, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.13286685943603516, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09258649498224258, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.07749529927968979, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11575503647327423, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10607432574033737, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09597422927618027, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07317578047513962, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.06905891001224518, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05956950783729553, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05106870085000992, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04483186826109886, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.043259114027023315, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02981235645711422, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02370443008840084, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023022491484880447, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.019952192902565002, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.018875697627663612, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01595534197986126, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01604151912033558, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013884102925658226, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011172482743859291, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.46.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20853886008262634, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19636192917823792, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19259846210479736, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1755501925945282, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0982622504234314, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09391296654939651, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10911738872528076, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10080476105213165, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09958250820636749, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08882132917642593, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08459749817848206, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.055577635765075684, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04828229546546936, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.047117795795202255, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.046845581382513046, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.027859944850206375, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024318339303135872, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024238167330622673, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02240126207470894, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02223292365670204, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014813834801316261, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014783077873289585, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014436368830502033, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009958896785974503, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.46.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.25036540627479553, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23570947349071503, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.23126733303070068, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.21072164177894592, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11778921633958817, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11262024939060211, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.13052943348884583, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12084615230560303, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11938463151454926, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10647890716791153, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10122081637382507, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06636565178632736, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05771631374955177, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05633590370416641, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05600859969854355, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03312591090798378, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02868257649242878, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028590833768248558, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.026338905096054077, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.026130441576242447, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017176689580082893, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016837462782859802, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016722217202186584, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010545821860432625, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.46.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25964346528053284, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.23611192405223846, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.22657309472560883, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.2015986293554306, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.12000905722379684, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.11074051260948181, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14035329222679138, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1288684904575348, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12309673428535461, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10473266243934631, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09851916134357452, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07099061459302902, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.061636459082365036, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05764913931488991, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05666762590408325, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03568539395928383, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029855817556381226, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02949131838977337, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02679462358355522, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026148978620767593, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018999522551894188, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018737180158495903, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01770033873617649, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01252584345638752, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.47.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12082860618829727, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11337707191705704, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11046525835990906, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10034451633691788, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05669654533267021, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0537165105342865, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06426122039556503, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05934830382466316, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05748794600367546, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0510675348341465, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.048639360815286636, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03265126422047615, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028362104669213295, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0271576177328825, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026865538209676743, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016319716349244118, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013918657787144184, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013804820366203785, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012766523286700249, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012584712356328964, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008514964021742344, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008413520641624928, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008121665567159653, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005424421280622482, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.47.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10308723896741867, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09674488008022308, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09404947608709335, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08543796092271805, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04835636541247368, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.045723386108875275, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0553150549530983, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0509980283677578, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04905102029442787, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.043581172823905945, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.041581351310014725, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02809225209057331, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02436533197760582, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.023155618458986282, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022870490327477455, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014023114927113056, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011854573152959347, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011733422987163067, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010875243693590164, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010693756863474846, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007306032348424196, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007175556384027004, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00691634975373745, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00456832954660058, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.47.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2461870163679123, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23138883709907532, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22655095160007477, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20604699850082397, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11586298048496246, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11046069860458374, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12955242395401, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1194436103105545, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11748086661100388, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10442305356264114, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09934426099061966, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06581628322601318, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0570201575756073, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.055385787039995193, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05499716475605965, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032847944647073746, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028121205046772957, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027988532558083534, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025745660066604614, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025491461157798767, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01686929352581501, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016429927200078964, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01624881476163864, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010074300691485405, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.47.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20134902000427246, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1789644956588745, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17216354608535767, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14619286358356476, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09444019943475723, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0859668180346489, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.10729280114173889, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.09856228530406952, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09676812589168549, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07760170102119446, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07018587738275528, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05537942796945572, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04753876477479935, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.045646894723176956, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04519695043563843, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02780747041106224, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02398901805281639, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.023867813870310783, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020673159509897232, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.020374704152345657, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015186453238129616, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015332798473536968, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014606546610593796, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0109894759953022, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.47.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20137649774551392, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18963471055030823, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18596097826957703, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.16938018798828125, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09503962099552155, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09079787135124207, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10552936047315598, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09750698506832123, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09631206095218658, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08583240956068039, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0817495733499527, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.053791701793670654, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04667282849550247, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.045541875064373016, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04527194797992706, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.026864850893616676, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02349971979856491, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023426249623298645, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.021631551906466484, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021461807191371918, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014225583523511887, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014287319034337997, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013854692690074444, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009600379504263401, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.47.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.24935245513916016, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23479832708835602, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.23030735552310944, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2097996473312378, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11751306056976318, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.1122770607471466, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.13046663999557495, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12054377794265747, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11911100149154663, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10611801594495773, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10109510272741318, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06642204523086548, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0575963594019413, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05621982738375664, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05589418113231659, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03313196077942848, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.028689345344901085, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028597665950655937, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02634710818529129, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.026139933615922928, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017362680286169052, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01696026511490345, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016918033361434937, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010786903090775013, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.47.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24246971309185028, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21882009506225586, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20842257142066956, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.18616515398025513, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11157366633415222, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10188952088356018, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1322738230228424, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12173119932413101, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11481200158596039, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09707284718751907, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09187115728855133, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06707467138767242, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05813050642609596, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05357610061764717, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05245743319392204, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03350627422332764, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.027790077030658722, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.027348356321454048, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.024904122576117516, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024160873144865036, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01773657090961933, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017654260620474815, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01626039855182171, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011765005066990852, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.48.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1167013943195343, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10951069742441177, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10658662021160126, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09688995033502579, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05477199703454971, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05185695365071297, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.062305569648742676, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.057406604290008545, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05554138496518135, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04930465668439865, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04708108305931091, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03166469931602478, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02744288183748722, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02623206190764904, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025940412655472755, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01583716832101345, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.0134374825283885, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013319294899702072, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012323097325861454, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012136498466134071, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008257320150732994, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008111598901450634, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007853873074054718, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005191871430724859, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.48.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10170752555131912, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09542392194271088, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09272491931915283, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08432069420814514, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04766929894685745, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.045064352452754974, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.054736457765102386, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05027700588107109, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04834919050335884, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.042932331562042236, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.041101738810539246, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027759917080402374, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024018583819270134, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02284691110253334, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.022563494741916656, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013871538452804089, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01170777902007103, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011586768552660942, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010738988406956196, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01055984664708376, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007252051495015621, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007097276393324137, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0068742018193006516, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004536197520792484, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.48.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.247709259390831, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23268431425094604, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22786420583724976, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20722922682762146, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11662543565034866, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11112143099308014, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13045287132263184, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1202365830540657, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11827073246240616, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10508473217487335, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09996021538972855, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06637446582317352, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.057440150529146194, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05578896403312683, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.055395789444446564, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.033121075481176376, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02840879000723362, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028281480073928833, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02601141296327114, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025753330439329147, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017091430723667145, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016732359305024147, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0164665337651968, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010458176024258137, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.48.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21467633545398712, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18429459631443024, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17407305538654327, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14920425415039062, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09963349997997284, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08780840039253235, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11699724197387695, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10679219663143158, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10333456844091415, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07982715219259262, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07319238781929016, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.060457054525613785, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05212987959384918, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.048931024968624115, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04813634231686592, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03056148998439312, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02681880071759224, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02661474421620369, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023161275312304497, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02267366833984852, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017242571339011192, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018709365278482437, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016245996579527855, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01470579020678997, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.48.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20446249842643738, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1924986094236374, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18876691162586212, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1719157099723816, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09649322181940079, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09219583123922348, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10706247389316559, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09902423620223999, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09777385741472244, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08714199811220169, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0829286277294159, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0546332448720932, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.047400087118148804, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04625049978494644, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04597596451640129, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.027238231152296066, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02385365590453148, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023775698617100716, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.021958954632282257, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021786890923976898, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014386339113116264, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014495435170829296, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014013915322721004, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009734473191201687, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.48.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2506999969482422, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23599529266357422, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.23150214552879333, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.21090392768383026, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11814310401678085, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11287988722324371, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.13115346431732178, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12118154764175415, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11971680074930191, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.1067054346203804, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.1015939936041832, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0666770413517952, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05787963047623634, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05649178475141525, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05616303160786629, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03326401859521866, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.028786489740014076, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028691690415143967, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.026423240080475807, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.026209864765405655, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017362963408231735, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.016945764422416687, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016904542222619057, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010668222792446613, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.48.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.24373942613601685, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22081471979618073, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21080686151981354, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1881960779428482, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11248871684074402, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.1030024066567421, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1326594203710556, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12223242223262787, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11562652140855789, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09812935441732407, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0927334576845169, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06761568784713745, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.05860274285078049, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05416719242930412, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.053069911897182465, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03398791700601578, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02832154557108879, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02789848856627941, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025495760142803192, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024786371737718582, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018273334950208664, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01826667971909046, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01681925542652607, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012590093538165092, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.49.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12518168985843658, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11747553944587708, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11451347917318344, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1040414571762085, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.058765411376953125, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05570729076862335, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06633567810058594, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06133871152997017, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05959467589855194, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05293041467666626, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05026349797844887, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03366945683956146, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02929016947746277, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.028124570846557617, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027837300673127174, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016819661483168602, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014359280467033386, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01425229199230671, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013155986554920673, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012981290929019451, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008722551167011261, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008588562719523907, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00833943858742714, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005430968478322029, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.49.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10841420292854309, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10175741463899612, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09909523278474808, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09004876762628555, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05091642588376999, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.048237938433885574, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.057923756539821625, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.053358957171440125, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0516347698867321, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.045899320393800735, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.043692562729120255, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029426032677292824, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025486383587121964, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02437935583293438, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02411939948797226, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01469778548926115, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012465111911296844, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012356228195130825, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011429976671934128, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011262798681855202, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007656204979866743, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007489588111639023, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.0072975754737854, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0047510904259979725, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.49.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24770095944404602, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2327413558959961, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22798022627830505, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20718051493167877, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11658235639333725, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11112748831510544, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13016784191131592, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12007796764373779, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1182524561882019, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10503850132226944, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0998973697423935, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06615723669528961, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05733674019575119, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05575224384665489, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05538138374686241, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03301232308149338, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028331071138381958, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02821270190179348, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025922581553459167, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025684775784611702, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.016993384808301926, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.016586029902100563, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016397135332226753, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01023026555776596, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.49.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2111223340034485, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19040443003177643, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18299779295921326, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15951018035411835, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09816832840442657, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09034321457147598, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11355789750814438, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10478591173887253, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1012861579656601, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08395464718341827, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07710015028715134, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05832231789827347, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05031079798936844, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.047244422137737274, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04650893062353134, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02918677031993866, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02451803907752037, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024283723905682564, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02167060784995556, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021182136610150337, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015507711097598076, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015431956388056278, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01450737938284874, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01041348371654749, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.49.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20939365029335022, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.19703860580921173, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19317732751369476, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.17590400576591492, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09885718673467636, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09438411146402359, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10979129374027252, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10147938877344131, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10019227862358093, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08920329809188843, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08488257229328156, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05599696934223175, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.048559803515672684, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04737604781985283, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04709811881184578, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.027941109612584114, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024411432445049286, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02433452568948269, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022456344217061996, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02227753773331642, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014792649075388908, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014800310134887695, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014407462440431118, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009897415526211262, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.49.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2527230381965637, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23781955242156982, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.23323474824428558, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.21239110827445984, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11910146474838257, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11377981305122375, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.13209834694862366, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12217269092798233, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.12071307748556137, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10748841613531113, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10224796831607819, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06718569248914719, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05835489556193352, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.056942734867334366, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05661138892173767, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03347267210483551, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0289821308106184, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.028891069814562798, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.026583340018987656, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02636696957051754, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01738927885890007, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01701970212161541, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016920777037739754, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.0106511814519763, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.49.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.25153130292892456, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.22819484770298004, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21856509149074554, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.19479966163635254, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11605662852525711, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.1067609041929245, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13574367761611938, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1251000314950943, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.1192115768790245, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10120002925395966, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.0953630805015564, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06878877431154251, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.059675589203834534, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05560996010899544, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.054610349237918854, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.034389618784189224, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028590362519025803, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02821245789527893, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02558841183781624, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.024919435381889343, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018112223595380783, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.017708564177155495, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.016782473772764206, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.011414870619773865, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.50.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12411971390247345, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11637289822101593, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11333407461643219, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10280382633209229, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.058274950832128525, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0551966167986393, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06608264893293381, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06105441600084305, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05913342908024788, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.052419357001781464, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04984048008918762, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03358589857816696, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0291814636439085, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02792135439813137, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.027620693668723106, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016803428530693054, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014308043755590916, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014183416962623596, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013103636913001537, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01290902029722929, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008776524104177952, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008645791560411453, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008356397971510887, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005552211310714483, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.50.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10708855092525482, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10042400658130646, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09766959398984909, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08859699964523315, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.050265613943338394, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04754851758480072, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05737803876399994, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05287019535899162, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05101899057626724, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04520653933286667, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04307456687092781, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.02913467213511467, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025250030681490898, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0240783654153347, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02379896678030491, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.014554898254573345, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012318984605371952, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012200539000332355, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011278649792075157, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011097894050180912, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007585859391838312, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00743232574313879, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007205109111964703, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0047237384133040905, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.50.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2515150010585785, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23614296317100525, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23116052150726318, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20991654694080353, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11832461506128311, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11272922903299332, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13237208127975464, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12203415483236313, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1200375109910965, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10648194700479507, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10124289989471436, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06729581952095032, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.058246172964572906, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05655964836478233, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05617102235555649, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.033568624407052994, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02871568687260151, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028586365282535553, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.026246530935168266, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02598956972360611, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0172440055757761, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01677863672375679, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01660686358809471, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010281957685947418, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.50.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21294602751731873, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17142754793167114, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.15775862336158752, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1375790536403656, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0957333892583847, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0808200091123581, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11906369030475616, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10613550990819931, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.09958426654338837, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07588445395231247, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07013062387704849, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.059322770684957504, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0528351292014122, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04803271219134331, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04685363173484802, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.030565397813916206, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027975881472229958, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02759304828941822, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024608567357063293, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02392008900642395, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017673930153250694, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.02127392590045929, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01622813381254673, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01786593720316887, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.50.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2145794928073883, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2018194943666458, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1978660672903061, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1801060438156128, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10132648050785065, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09673881530761719, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11267443001270294, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1039939671754837, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10270138829946518, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09138055145740509, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08689919859170914, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05742094665765762, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0498211607336998, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04859793186187744, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04831843823194504, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028727440163493156, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02510599046945572, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025020848959684372, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023094335570931435, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022909605875611305, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015323520638048649, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015297072939574718, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014932073652744293, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010323228314518929, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.50.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2561432719230652, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.24094052612781525, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.2362591028213501, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.21504312753677368, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.12079603970050812, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11534402519464493, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.13433754444122314, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12394160032272339, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.12241577357053757, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10894856601953506, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10363982617855072, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06834576278924942, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05924811586737633, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05781714990735054, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05748646706342697, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03416883200407028, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.0295356884598732, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02943679876625538, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.027103547006845474, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.026889465749263763, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01804366149008274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.017505845054984093, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.017579419538378716, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01118707936257124, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.50.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2504608631134033, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2276686728000641, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.21792711317539215, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1942511796951294, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11578597873449326, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10651496797800064, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13545890152454376, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1251547634601593, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11887194216251373, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10119227319955826, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09539660811424255, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06886535882949829, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.059935685247182846, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05564795807003975, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05460469424724579, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03446891903877258, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.028927844017744064, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02852199599146843, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026021596044301987, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.025334710255265236, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.018168434500694275, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018372192978858948, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01675746776163578, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012404831126332283, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.51.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12906242907047272, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12107214331626892, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11807519942522049, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10723000019788742, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06066533178091049, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05755603313446045, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06841708719730377, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06323935091495514, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.061554282903671265, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05458042770624161, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.051864899694919586, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.034741323441267014, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.030216973274946213, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.029040399938821793, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02876228652894497, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017349913716316223, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.014806482940912247, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014696341007947922, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013547269627451897, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.0133671248331666, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008973333984613419, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008810707367956638, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008581171743571758, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0055183302611112595, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.51.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11304738372564316, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10600841045379639, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10324424505233765, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0937967449426651, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05315172299742699, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05036754161119461, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06029144302010536, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.055591702461242676, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.053906988352537155, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04780105501413345, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04553353786468506, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030609186738729477, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026539059355854988, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025437092408537865, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025176800787448883, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015281036496162415, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012988547794520855, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012883300893008709, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011888298206031322, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01171796303242445, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007938826456665993, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007768283598124981, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00757983373478055, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004889708943665028, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.51.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.25714391469955444, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.24155257642269135, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23648031055927277, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.21499298512935638, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.12113314867019653, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11539357900619507, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13527153432369232, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12478059530258179, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.12286031246185303, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10904816538095474, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10361716896295547, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0687437355518341, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05958086624741554, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05791783705353737, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.057524994015693665, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03428945690393448, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.029390346258878708, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.029267754405736923, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02686910331249237, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.026616811752319336, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017617132514715195, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017138205468654633, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016995443031191826, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.01047623809427023, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.51.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22636398673057556, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19709351658821106, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1875268518924713, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1611340492963791, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10546419769525528, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09444575756788254, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1215360090136528, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11162416636943817, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10873506963253021, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08474991470575333, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07923919707536697, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06264553964138031, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05396365746855736, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.051150254905223846, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05046946555376053, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03152165189385414, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027164505794644356, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026978112757205963, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02315068244934082, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022696517407894135, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017418917268514633, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01782737858593464, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01655561663210392, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013075792230665684, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.51.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21930094063282013, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2062128186225891, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.20216543972492218, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18393369019031525, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.1035800352692604, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09886093437671661, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1151910200715065, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10633289813995361, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10500548779964447, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09337886422872543, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0888918936252594, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05882234498858452, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05091876536607742, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04967408627271652, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04936780408024788, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02938714064657688, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02562793903052807, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025548983365297318, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.0235697440803051, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023382989689707756, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01563800498843193, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015599980019032955, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015237282030284405, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010499423369765282, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.51.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2591562867164612, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.24371826648712158, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.23903386294841766, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.2173205018043518, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.12225130945444107, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11668696999549866, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1356998234987259, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12548771500587463, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1239447295665741, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.11021111160516739, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10472194105386734, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06906265765428543, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.059947285801172256, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05848332494497299, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05813297629356384, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.034420717507600784, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02978815883398056, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.029685383662581444, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02729012630879879, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.027066824957728386, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.017924923449754715, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.017521105706691742, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.017440276220440865, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01099358219653368, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.51.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2584872841835022, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.23491951823234558, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.22516806423664093, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.20056796073913574, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11958127468824387, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.11016418039798737, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13986161351203918, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12873844802379608, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12264357507228851, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10440827161073685, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09831801056861877, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07113127410411835, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.061604030430316925, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05746615678071976, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.056457433849573135, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03577396646142006, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02983792871236801, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02945355698466301, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.0268086027354002, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026140868663787842, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.0192184466868639, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018853316083550453, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017868317663669586, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012691671028733253, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.52.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1348811388015747, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12649106979370117, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12340404093265533, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1119849905371666, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06342515349388123, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.060177937150001526, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07169970124959946, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06608178466558456, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06434065103530884, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05703354626893997, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05423775687813759, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.036411114037036896, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03158818930387497, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.030371813103556633, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.030085496604442596, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018222182989120483, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015530048869550228, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015419097617268562, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014213228598237038, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014028681442141533, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009509733878076077, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00928758829832077, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.009110642597079277, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005901038181036711, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.52.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11713448911905289, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10981999337673187, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10698787122964859, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09709145873785019, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05511733144521713, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05221713334321976, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0626116693019867, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05763346329331398, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05591992288827896, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04958546906709671, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04720441624522209, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.031844478100538254, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027547383680939674, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026394061744213104, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.026120660826563835, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01590203307569027, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013492587953805923, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013385032303631306, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012353996746242046, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012179087847471237, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008293362334370613, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008098623715341091, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007913187146186829, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005139518529176712, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.52.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2561023533344269, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.24039633572101593, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23535142838954926, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.2137463539838791, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.12069115042686462, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11489638686180115, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1346573531627655, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12424372881650925, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.12238230556249619, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10851031541824341, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10312307626008987, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06843387335538864, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.059339284896850586, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05770880728960037, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.057322826236486435, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0341566763818264, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02930721454322338, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.029194267466664314, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.026783712208271027, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.026533735916018486, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017567893490195274, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017145704478025436, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01695871725678444, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010560502298176289, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.52.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.23710164427757263, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.20776841044425964, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.19777628779411316, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1732529252767563, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11074426025152206, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09852717816829681, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12864497303962708, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11797262728214264, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11438708007335663, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.09179585427045822, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08453419059515, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06645212322473526, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.056986045092344284, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.053742799907922745, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0529509037733078, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03329896554350853, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028438864275813103, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028209568932652473, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.024750392884016037, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02422557771205902, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.0182010680437088, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018608322367072105, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017186390236020088, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013498388230800629, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.52.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21595069766044617, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20293550193309784, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19892005622386932, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.18075592815876007, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10205136239528656, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09736834466457367, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11339464038610458, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10479998588562012, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10347048938274384, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09189072996377945, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08739522844552994, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05790576711297035, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0501851812005043, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04892902076244354, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04863262549042702, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028908664360642433, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025230647996068, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025146234780550003, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02316737361252308, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.0229793768376112, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015307880006730556, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015319201163947582, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01489967480301857, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010252393782138824, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.52.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.26017144322395325, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2445128858089447, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.23970219492912292, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.217912495136261, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.12284830212593079, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11718803644180298, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1365823894739151, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12612269818782806, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.12452201545238495, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.11063693463802338, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10511685907840729, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06969954073429108, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.06029658392071724, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05880773067474365, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.058454133570194244, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03475980833172798, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.030082667246460915, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02998623251914978, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.027574336156249046, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02735186368227005, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.018319763243198395, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.017892835661768913, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.017836598679423332, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.011513406410813332, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.52.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2625791132450104, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.23897497355937958, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.22917750477790833, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.20407217741012573, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.12157460302114487, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.1121283546090126, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1414225697517395, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1307787448167801, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12465246766805649, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10618039220571518, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09984081238508224, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07185889035463333, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06251446157693863, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05837259069085121, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.0573672354221344, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03594561293721199, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03022581711411476, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029831402003765106, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.027141757309436798, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026470515877008438, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.0189636442810297, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01898156851530075, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017625076696276665, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012648504227399826, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.53.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13200753927230835, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12365521490573883, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12047893553972244, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10926476120948792, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06207156181335449, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.058809686452150345, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07015521824359894, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0648740604519844, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06301279366016388, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.055759429931640625, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05291317403316498, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03561258688569069, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.030988579615950584, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.029723811894655228, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.029423270374536514, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017792558297514915, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015181303955614567, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01506908517330885, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013877219520509243, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013686306774616241, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009229520335793495, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009079022333025932, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008811221458017826, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0057340506464242935, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.53.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11552145332098007, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1082010269165039, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10539278388023376, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09554088115692139, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0543224960565567, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.051416054368019104, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.061789970844984055, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05691731721162796, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05515338107943535, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.048797253519296646, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04644047096371651, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0313829742372036, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027210885658860207, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02602250501513481, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025736838579177856, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01567181386053562, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013313192874193192, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013199074193835258, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01217892486602068, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012000015005469322, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008166749961674213, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00801094900816679, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007775844074785709, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0050954511389136314, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.53.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2574569284915924, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.24148885905742645, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23642253875732422, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.21447843313217163, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.12131974846124649, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11550518870353699, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13547372817993164, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1250022053718567, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.12305617332458496, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10900966823101044, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10344994068145752, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06886348128318787, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05968528240919113, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05800796300172806, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05762598663568497, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.034348729997873306, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.029451793059706688, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.029325593262910843, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.026886411011219025, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02663256973028183, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01765872724354267, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017191162332892418, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017037075012922287, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010538428090512753, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.53.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2156284600496292, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18851657211780548, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17541268467903137, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15429668128490448, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10067080706357956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0878959521651268, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12352005392313004, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11321690678596497, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10352255403995514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08323357254266739, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07831718772649765, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06363353133201599, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.054491229355335236, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.048714861273765564, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04727163910865784, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.031794194132089615, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025673702359199524, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.025020690634846687, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.022247621789574623, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021298136562108994, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017029158771038055, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017095040529966354, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015114194713532925, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011879818513989449, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.53.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21673300862312317, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20357492566108704, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1995059847831726, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1812307983636856, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10243142396211624, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09767059981822968, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11400837451219559, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10520520806312561, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10388283431529999, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09217806160449982, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08754798769950867, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05815238505601883, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05041532218456268, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.049143169075250626, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.048848189413547516, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02908019721508026, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02538316138088703, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02529764175415039, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02330540306866169, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023118875920772552, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015484758652746677, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015476290136575699, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015071198344230652, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010441437363624573, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.53.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.26204100251197815, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.24607297778129578, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.24120531976222992, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.21915137767791748, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.12365853786468506, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11791302263736725, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.13719965517520905, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12699569761753082, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.12539851665496826, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.1112387403845787, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10559380799531937, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06986688077449799, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.06066295504570007, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.059149932116270065, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05879371613264084, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.034808069467544556, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.030112965032458305, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.03001287579536438, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.027553889900445938, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.027328774333000183, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.018074091523885727, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01771296188235283, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.017573829740285873, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.011091592721641064, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.53.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.269879013299942, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2458481788635254, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2360769510269165, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.21010294556617737, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.1251411736011505, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.11560803651809692, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1460694670677185, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13411033153533936, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12819308042526245, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10928310453891754, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.10266328603029251, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.0739624947309494, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06415771692991257, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.06011194735765457, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05912395194172859, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03709973394870758, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03116767108440399, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.03079116903245449, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02799910679459572, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02733641117811203, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01974879391491413, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01958245411515236, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01841898262500763, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013133612461388111, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.54.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13485923409461975, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1262325644493103, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12298283725976944, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11144142597913742, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06343583762645721, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.0600452683866024, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07171910256147385, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06627482175827026, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06438463926315308, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.056880392134189606, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05393287166953087, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03641572222113609, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.031673092395067215, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.030388956889510155, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.030075306072831154, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01819908246397972, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015522660687565804, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015404959209263325, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014170946553349495, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013978063128888607, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009447054006159306, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009297833777964115, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.009021161124110222, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0058900038711726665, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.54.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11748166382312775, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11005754768848419, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10705902427434921, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09703762084245682, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05530918017029762, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05231085419654846, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06303718686103821, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05801253393292427, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05612994730472565, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04963076114654541, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.047216009348630905, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03203779458999634, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.02773357927799225, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0265004001557827, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02620641514658928, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01599988527595997, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013555847108364105, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013435202650725842, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012387799099087715, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012199964374303818, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008334586396813393, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008160550147294998, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007932829670608044, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005177331622689962, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.54.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.26220452785491943, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.24573108553886414, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.24059103429317474, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.21812066435813904, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.12363439798355103, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11759459227323532, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13803328573703766, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12730641663074493, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.12542392313480377, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.11092811822891235, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10519952327013016, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.07019548863172531, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.060820162296295166, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0591222420334816, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05871148779988289, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.035003870725631714, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.030015768483281136, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02989117056131363, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.027372948825359344, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.027110107243061066, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01798768900334835, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01754191145300865, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017354324460029602, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010770799592137337, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.54.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2280280441045761, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.19664260745048523, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.18584510684013367, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.16169285774230957, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.1056220531463623, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09314440935850143, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12363846600055695, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.11339962482452393, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10954448580741882, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08619994670152664, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0786895602941513, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0634307712316513, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05446906015276909, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05093615502119064, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.050082117319107056, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03179500997066498, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.026580573990941048, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.026327643543481827, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02271609753370285, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.022129787132143974, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017131632193922997, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01695762202143669, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.015997741371393204, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011657644994556904, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.54.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2158278375864029, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2026396244764328, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19849489629268646, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1802687793970108, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10207726061344147, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09730121493339539, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11341055482625961, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10489505529403687, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1035381630063057, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09175299108028412, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08718211948871613, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05792197212576866, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05027894303202629, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.049004409462213516, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04870140925049782, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028919536620378494, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02533850632607937, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025250811129808426, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023255037143826485, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.023066729307174683, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015307609923183918, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015489249490201473, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014886836521327496, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010505895130336285, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.54.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.26102080941200256, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.24507743120193481, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.24017995595932007, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.21809332072734833, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.12334252148866653, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11758217215538025, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.13692575693130493, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12665225565433502, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1250578761100769, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.11085165292024612, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10522866994142532, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0698012188076973, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.06055217236280441, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.059031397104263306, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05866916850209236, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03479495272040367, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.030141767114400864, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.030039796605706215, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.027575092390179634, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.027343016117811203, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.018162213265895844, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01785006932914257, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.017662758007645607, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.011365227401256561, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.54.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.26552972197532654, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.24149838089942932, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.23155969381332397, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.20590358972549438, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.12304018437862396, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.11342194676399231, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.143718421459198, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.1324411928653717, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12623707950115204, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10738347470760345, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.10089590400457382, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07320579886436462, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0633728951215744, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.059089094400405884, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05805356428027153, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03678208217024803, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030622337013483047, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.03021836280822754, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.02748367376625538, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02679053507745266, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.0197606198489666, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019258959218859673, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018364213407039642, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01284080371260643, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.55.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13427488505840302, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12567104399204254, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12234747409820557, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11077799648046494, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06318264454603195, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.059759899973869324, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07165159285068512, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06617814302444458, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06413743644952774, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05667010322213173, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05376260727643967, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0364081934094429, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.031645819544792175, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03027527779340744, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.029959222301840782, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018200522288680077, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015483497641980648, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015353386290371418, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014137916266918182, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013930663466453552, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009470121003687382, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009307535365223885, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.009022112935781479, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0059095025062561035, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.55.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11638104915618896, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10891923308372498, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10590831935405731, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09595314413309097, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05474875494837761, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05173276364803314, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0622749961912632, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05746003985404968, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05555708333849907, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.049076203256845474, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04664969816803932, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03164728730916977, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.0274642426520586, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.026226963847875595, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025928417220711708, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01580929197371006, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013418215326964855, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01329739112406969, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0122581347823143, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012067534029483795, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008232125081121922, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008090384304523468, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007828373461961746, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005151985678821802, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.55.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.2582041025161743, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2420261949300766, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23672766983509064, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.214624285697937, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.12173514813184738, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11575767397880554, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13594073057174683, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12553276121616364, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1235104501247406, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10921576619148254, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10351289808750153, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0691053718328476, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05996058136224747, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.058222465217113495, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05781032517552376, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03449070081114769, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.02954019047319889, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.029411833733320236, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.026922082528471947, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.026658061891794205, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017714669927954674, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017226489260792732, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017076639458537102, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010496525093913078, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.55.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.22749075293540955, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2015155553817749, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1924700289964676, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1658928543329239, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10576871037483215, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09577308595180511, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.12371606379747391, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1124386265873909, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10902712494134903, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.087144635617733, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.08133909851312637, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06343239545822144, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.054393667727708817, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05135239660739899, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05062289535999298, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.032005127519369125, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.027424722909927368, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.027214791625738144, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.023839641362428665, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.023368336260318756, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017915746197104454, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.018226655200123787, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017030946910381317, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.013542454689741135, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.55.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2142748087644577, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.20113417506217957, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19704067707061768, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.17886866629123688, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10135199874639511, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09659905731678009, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11284882575273514, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10417179763317108, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10282207280397415, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09108161181211472, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0864923894405365, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05761628970503807, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.049928225576877594, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04864957928657532, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04834861308336258, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028808582574129105, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02515898086130619, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.025075672194361687, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02307843789458275, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022889088839292526, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015328442677855492, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015381590463221073, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014912906102836132, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010427705012261868, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.55.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.25924891233444214, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.24327605962753296, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.23836669325828552, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.21645788848400116, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.12252125144004822, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11676248908042908, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.13637199997901917, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12589460611343384, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.12428733706474304, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.11011813580989838, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.10449478775262833, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06956083327531815, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.060232654213905334, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05869928002357483, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.058343302458524704, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03474031016230583, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.03006461262702942, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.029958466067910194, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.027515707537531853, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.027283987030386925, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.018349381163716316, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.017938578501343727, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.01785062998533249, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.011601188220083714, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.55.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2659372389316559, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.24157673120498657, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.23108406364917755, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.2053890824317932, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.12320280075073242, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.1132296696305275, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14488600194454193, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13333459198474884, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12646621465682983, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10739650577306747, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.10094183683395386, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07362949848175049, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06382807344198227, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.059220679104328156, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05807170271873474, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.037104226648807526, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.03070458583533764, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.030252931639552116, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.0275432001799345, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026790019124746323, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.02000819519162178, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019411347806453705, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.018553946167230606, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012949508614838123, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.56.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13580597937107086, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1269778609275818, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.12349521368741989, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11185408383607864, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06388374418020248, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06034335494041443, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07242731004953384, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.0670248493552208, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06484551727771759, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.057207394391298294, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05423006787896156, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03676430135965347, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.032020650804042816, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0306190587580204, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03027036041021347, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.0183735229074955, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01566392183303833, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015526216477155685, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014290953986346722, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.014072042889893055, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.00954871904104948, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009435870684683323, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.00909404642879963, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0059983874671161175, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.56.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11586456000804901, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10836292058229446, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10521220415830612, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0952347069978714, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05452357232570648, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.051414500921964645, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06231599301099777, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05748201534152031, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05534381419420242, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.048831451684236526, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.046408988535404205, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.031685855239629745, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.027486180886626244, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.0261420588940382, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025820093229413033, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01583247072994709, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013414999470114708, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.01327695231884718, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012247652746737003, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012041637673974037, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008276832289993763, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008156338706612587, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007840893231332302, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005246621556580067, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.56.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.26460298895835876, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.24790827929973602, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.24254541099071503, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.2197539061307907, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.12482042610645294, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11863894760608673, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.1395411640405655, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12871153652668, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.12662117183208466, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1118462085723877, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10610642284154892, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.07092560082674026, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.06147407740354538, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05968907102942467, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05925462394952774, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03538095951080322, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.03029036708176136, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.03015895001590252, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.027590526267886162, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.027317199856042862, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.01816106028854847, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017688296735286713, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01750931702554226, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010823970660567284, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.56.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21633939445018768, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18936525285243988, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.17979533970355988, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15579278767108917, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09900536388158798, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08930167555809021, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11689326912164688, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10774517059326172, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.10369282960891724, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08293648809194565, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07637224346399307, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05978373438119888, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.051621973514556885, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04760252311825752, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.046619612723588943, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.02984887920320034, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024705002084374428, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024443238973617554, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02150348573923111, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02084626629948616, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015763115137815475, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015710946172475815, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014453137293457985, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010496973991394043, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.56.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21429017186164856, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2010890394449234, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.19691036641597748, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1787194311618805, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10137837380170822, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09660355001688004, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11274061352014542, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.1042209044098854, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.1028301864862442, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09108060598373413, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08644124120473862, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05760959908366203, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.049975112080574036, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.048685915768146515, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04838288575410843, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028789479285478592, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025207357481122017, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0251210518181324, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.023126568645238876, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022935252636671066, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015302052721381187, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015455830842256546, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.0148776825517416, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01052691601216793, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.56.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.25164860486984253, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.23611021041870117, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.23130930960178375, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20995239913463593, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11888426542282104, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.11327870935201645, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1322711706161499, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.12216886132955551, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.12056980282068253, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10680881887674332, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.1013556718826294, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06746455281972885, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05843586474657059, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05693760886788368, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05659230053424835, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.033652860671281815, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02909941039979458, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.029004957526922226, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.02661389857530594, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.026388470083475113, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01765494793653488, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01728844828903675, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.017158037051558495, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.011055486276745796, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.56.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.26643106341362, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.2415258288383484, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.2310502827167511, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.2051241546869278, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.12322984635829926, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.11324876546859741, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14448431134223938, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13325351476669312, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12666533887386322, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.1072211042046547, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.10073435306549072, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07346226274967194, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06368814408779144, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05917893350124359, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.058074504137039185, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03673349693417549, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.030605552718043327, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.030188649892807007, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.027392348274588585, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.026658618822693825, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019385065883398056, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.01921863481402397, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017902003601193428, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.012690057046711445, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.57.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.1338440179824829, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12512676417827606, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1217026636004448, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.11013653874397278, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06296386569738388, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.059435851871967316, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0714021772146225, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06608527898788452, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0639105886220932, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.056360069662332535, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05340800806879997, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03625311702489853, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03158736601471901, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.030166789889335632, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02982848323881626, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.018124211579561234, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.015439588576555252, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.015300416387617588, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.014083296991884708, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013865994289517403, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009421507827937603, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009310569614171982, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008962509222328663, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00593162328004837, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.57.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11303731054067612, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10572820901870728, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10264142602682114, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09293346107006073, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.053161073476076126, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05013873055577278, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06075133755803108, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05604308098554611, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05396144092082977, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04762900248169899, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.0452301949262619, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.030861221253871918, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.026780331507325172, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.025491565465927124, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.025173425674438477, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.015429838560521603, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013078645803034306, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012945497408509254, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011945192702114582, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011744117364287376, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008069589734077454, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00795003306120634, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007648975122720003, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00511529203504324, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.57.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.26147061586380005, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2449493110179901, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.23958496749401093, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.21701380610466003, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.12338036298751831, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.1172693520784378, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13789817690849304, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1272602528333664, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1251944899559021, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.11056242883205414, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10480944067239761, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.07014228403568268, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.06078891083598137, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05901383236050606, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05858685448765755, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03498547151684761, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.029968194663524628, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.029833946377038956, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02729853242635727, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.027023985981941223, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017979314550757408, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017522374168038368, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01732996664941311, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010742120444774628, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.57.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.21305014193058014, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.18981215357780457, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1821003258228302, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.15629969537258148, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.10009034723043442, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.09106115251779556, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11433331668376923, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10514122992753983, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1024414524435997, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.08261018246412277, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07559071481227875, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05868007242679596, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.050512392073869705, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.048185236752033234, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04763897508382797, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029303621500730515, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.025049244984984398, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.024879565462470055, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.021564992144703865, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.021187299862504005, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015608686953783035, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01568586938083172, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014861970208585262, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010747662745416164, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.57.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2129814624786377, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.199750155210495, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1956653743982315, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.17753896117210388, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10072475671768188, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09593863040208817, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.11200007051229477, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10359061509370804, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10220393538475037, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09044516831636429, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.085758276283741, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05712028965353966, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.049641646444797516, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04834207519888878, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.04803692549467087, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02853340655565262, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024959206581115723, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.024873996153473854, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022872762754559517, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02268247678875923, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015069132670760155, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015212119556963444, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014643930830061436, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010244819335639477, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.57.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.2443346232175827, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2292134165763855, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.22451049089431763, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.20368020236492157, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.11545056104660034, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10996246337890625, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.12830667197704315, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11865536123514175, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11711437255144119, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.10365893691778183, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09827718883752823, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.06539595872163773, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05673718824982643, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05527026206254959, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.05491894483566284, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.03259991109371185, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.028190961107611656, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02809746190905571, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.025760864838957787, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.025535322725772858, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016983812674880028, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01666090078651905, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.016492314636707306, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01054564118385315, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.57.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2640811502933502, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.23836809396743774, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.22757041454315186, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.20194268226623535, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.12195111066102982, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.11163775622844696, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.14349299669265747, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.13226917386054993, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12563703954219818, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.10582054406404495, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09938334673643112, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07290861010551453, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0631619319319725, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.058509085327386856, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05736592411994934, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03645281493663788, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.0301735270768404, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.029751824215054512, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026927972212433815, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02616489864885807, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019191909581422806, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018872756510972977, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01766124740242958, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01229005679488182, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.58.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13065136969089508, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1219586580991745, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11836453527212143, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10694124549627304, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06136459484696388, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05781644210219383, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07003585249185562, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06480853259563446, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06232735514640808, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.0548643097281456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.05196588113903999, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03558476269245148, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.030981432646512985, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.029437851160764694, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.029060736298561096, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017806926742196083, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01508669275790453, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014933275058865547, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013754778541624546, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.013511596247553825, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009291141293942928, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.009164717048406601, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008796019479632378, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005873357877135277, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.58.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10738403350114822, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10031671077013016, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09713728725910187, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08776718378067017, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05044221878051758, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04741120710968971, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05795786529779434, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.053573254495859146, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.051244817674160004, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.045119624584913254, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04278838634490967, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.029423216357827187, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025604940950870514, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.024188552051782608, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02384503185749054, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01471315324306488, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01242656446993351, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.012277026660740376, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011341369710862637, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.011123872362077236, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007713032886385918, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007611038628965616, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007267727982252836, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004912970587611198, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.58.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.25847572088241577, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.24199321866035461, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.236606627702713, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.2140047699213028, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.12195150554180145, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11584431678056717, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13665105402469635, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12590213119983673, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.12374166399240494, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.1091742292046547, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.10341218113899231, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06949654966592789, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.06017729267477989, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.058368757367134094, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05792788043618202, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.034689128398895264, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.029687626287341118, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.029548663645982742, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0270356647670269, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02675655111670494, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017854349687695503, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017444510012865067, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.017170729115605354, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010810611769557, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.58.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.20951275527477264, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.17949870228767395, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1697712540626526, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.14463193714618683, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.09693356603384018, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.08529408276081085, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.11252745240926743, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.10340844094753265, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.1008232831954956, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.07686485350131989, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.07105625420808792, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.05786733701825142, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.04977353289723396, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.04684514179825783, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.04612711817026138, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.029011700302362442, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.024668656289577484, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.0245048888027668, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.020748034119606018, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02027279883623123, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.015655051916837692, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.015979526564478874, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.014715162105858326, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011382806114852428, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.58.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.20492324233055115, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1921546459197998, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.18818089365959167, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.17066900432109833, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.09695073217153549, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09227775037288666, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10794097185134888, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09971100836992264, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09833374619483948, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08700446039438248, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0825067088007927, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.0550943985581398, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04781161621212959, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04654503986239433, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.046239763498306274, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.0275588296353817, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.024068683385849, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.023984089493751526, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022054236382246017, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.021868208423256874, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.014616083353757858, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01473342813551426, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014208110049366951, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009987164288759232, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.58.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.23142701387405396, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.21695853769779205, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.21249505877494812, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.19275887310504913, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10934294015169144, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.10411811619997025, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1216636449098587, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.11242547631263733, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.11092577874660492, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09814237058162689, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.09306853264570236, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.062048107385635376, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.053777579218149185, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.05237732082605362, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.052045226097106934, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.030951526015996933, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.026780739426612854, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02668905258178711, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.024474307894706726, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.024259386584162712, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.016203707084059715, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015924904495477676, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.015735071152448654, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.010210424661636353, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.58.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.256293922662735, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.23074272274971008, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.22023537755012512, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.1950845867395401, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11841306835412979, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10827475786209106, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.1399969756603241, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12838797271251678, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.12205301225185394, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.1024438738822937, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09614980220794678, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.07122988253831863, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.06150977686047554, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05699378252029419, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05589812248945236, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03573349863290787, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.029745643958449364, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.02933736890554428, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.026586128398776054, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02585216984152794, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.019165504723787308, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.019057003781199455, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017685923725366592, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.01303151622414589, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.59.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12454329431056976, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.1161835566163063, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11261369287967682, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10162632912397385, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05850924551486969, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.055036209523677826, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0671004205942154, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06198444217443466, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0594380721449852, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05224469304084778, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04954761266708374, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03416145220398903, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029668213799595833, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02809785306453705, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02770804427564144, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.017104962840676308, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01448493730276823, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014324997551739216, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013215305283665657, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012975867837667465, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.009007829241454601, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008937587961554527, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008511694148182869, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005888964980840683, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.59.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10434408485889435, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09736836701631546, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09414917230606079, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08495192229747772, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04898330196738243, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04593505710363388, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05666665360331535, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.052303995937108994, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.049752186983823776, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04374440759420395, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04148394986987114, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.028793981298804283, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.025024568662047386, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02351287193596363, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02314601093530655, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01441634725779295, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.012131169438362122, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011968493461608887, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.011070909909904003, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010839668102562428, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007600176148116589, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.007521278690546751, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.007118512410670519, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.00493789603933692, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.59.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.24722915887832642, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23105722665786743, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22572007775306702, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.204026997089386, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11660914868116379, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11061017960309982, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13086307048797607, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12067306786775589, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11837106943130493, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10416574031114578, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09862197935581207, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.0665673017501831, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05768832564353943, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.055820491164922714, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0553753525018692, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03323742002248764, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028411241248250008, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028268035501241684, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02583150938153267, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02554970793426037, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017138753086328506, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01675224117934704, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016472620889544487, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010407310910522938, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.59.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13654516637325287, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.12024103105068207, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11523374915122986, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.0994567945599556, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.06357765942811966, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05757766589522362, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07291186600923538, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06648511439561844, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06512607634067535, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.052244365215301514, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.048149727284908295, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.037540238350629807, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.03273177891969681, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03142302855849266, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03110261633992195, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01910001039505005, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.017628496512770653, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.017558179795742035, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.01559942215681076, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.015418430790305138, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.011029875837266445, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01259278692305088, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.010635016486048698, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010348116047680378, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.59.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1941729635000229, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.18201851844787598, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.17821384966373444, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1615990400314331, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.0918450728058815, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.08742325752973557, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.10223057866096497, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.09453324973583221, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.09323184937238693, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.08242309093475342, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.07814891636371613, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05226203054189682, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.04538523778319359, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04415179044008255, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.043861061334609985, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02611760050058365, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.02291679009795189, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.022829269990324974, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.021014556288719177, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02082911506295204, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013882923871278763, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.014143941923975945, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013471245765686035, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.00972982868552208, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.59.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.21533909440040588, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.2018384337425232, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1976083517074585, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1791822463274002, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.10179367661476135, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.09689731895923615, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.1133507490158081, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.10471571981906891, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.10329725593328476, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.09131459891796112, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.08649827539920807, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.05788196623325348, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.05015525221824646, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04881995543837547, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.048498354852199554, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.028895551338791847, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.025080498307943344, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.0249861478805542, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.022939205169677734, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.022741112858057022, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.015236146748065948, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.015092522837221622, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.014799302443861961, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009912937879562378, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.59.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.2447686493396759, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.21906962990760803, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.20874744653701782, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.184755340218544, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.11297967284917831, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.10279398411512375, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.13381825387477875, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.12256229668855667, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.11674772948026657, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.09726939350366592, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.09129594266414642, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.06838811933994293, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.058944664895534515, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.05457647144794464, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.05351906642317772, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.03436770662665367, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.02883283607661724, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.028466997668147087, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.025785692036151886, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02509094588458538, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.01866721175611019, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018932431936264038, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.017276952043175697, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.013507349416613579, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.60.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.12264011055231094, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.11454512178897858, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.11094805598258972, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10018150508403778, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05761895328760147, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.054216012358665466, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.06638678163290024, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06114263832569122, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.058532342314720154, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.051513008773326874, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04899657890200615, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03377322480082512, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.029258709400892258, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.027677021920681, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02729947678744793, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01693589985370636, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.01427314430475235, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.014104570262134075, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.013031198643147945, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012783976271748543, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008935360237956047, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.008803006261587143, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008440028876066208, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005779353901743889, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.60.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.10070248693227768, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.09404336661100388, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.0908413752913475, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08199753612279892, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04726382717490196, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04432028904557228, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.05482485145330429, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05060742050409317, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.048013292253017426, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.04228256270289421, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04011531546711922, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.027868589386343956, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.024209484457969666, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02270294725894928, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.0223334189504385, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.013952961191534996, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.011711381375789642, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.011545449495315552, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.010700107552111149, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.010465826839208603, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.007374473847448826, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00727414432913065, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006898054387420416, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.0047685811296105385, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.60.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.25270509719848633, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.23639068007469177, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.2310134917497635, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.20889168977737427, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11925584822893143, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.11321816593408585, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13364924490451813, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.12316885590553284, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.12107576429843903, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10663136839866638, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.1009330078959465, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06805238127708435, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05887393280863762, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.05708006024360657, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.05665861815214157, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.033955685794353485, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.029061127454042435, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.02892683446407318, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.02643655240535736, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.02616114169359207, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017494402825832367, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017125891521573067, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.01682404987514019, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010666041634976864, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.60.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.15494047105312347, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.13381275534629822, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.1274513602256775, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.10692253708839417, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.07268141955137253, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.06513883918523788, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.08247870951890945, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.07529722899198532, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.0743606761097908, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.05668504536151886, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.051560889929533005, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.042649030685424805, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.036818042397499084, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03564590960741043, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03536955267190933, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.021695226430892944, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.019619325175881386, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.019558735191822052, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.016710134223103523, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.01653166674077511, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.012481753714382648, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.013575478456914425, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.012127562426030636, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.010859065689146519, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.60.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.16082662343978882, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1505149006843567, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.14733555912971497, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.13366754353046417, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07632133364677429, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07265148311853409, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.0849987044930458, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.07857941836118698, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.07744524627923965, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.0685543566942215, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0650845393538475, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.043894920498132706, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.0384226031601429, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03742153197526932, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03718552738428116, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.02212553843855858, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.020654598250985146, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.020588284358382225, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.019217122346162796, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.019085071980953217, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.01255519688129425, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.0143347242847085, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.012250502593815327, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.011572851799428463, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.60.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1726188063621521, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.16174279153347015, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.15830546617507935, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.1436077356338501, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.08197713643312454, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.07809457182884216, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.09151853621006012, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.084390789270401, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.08320388197898865, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.07367898523807526, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.0699828714132309, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04722575098276138, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.041271619498729706, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.04020436853170395, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.03995534032583237, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.023880228400230408, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.022192295640707016, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.02212016098201275, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.020657433196902275, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.02051151730120182, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.013649350963532925, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.01540042832493782, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.013333342969417572, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.01242887694388628, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.60.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.20903317630290985, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.18513572216033936, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.17579153180122375, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.15577900409698486, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.09621188044548035, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.08697978407144547, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.11638007313013077, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.10496624559164047, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.09963974356651306, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.08257237821817398, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.07794275134801865, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.05961628258228302, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.0511469766497612, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.04717200621962547, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.04618614539504051, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.030529862269759178, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.026023896411061287, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.025694845244288445, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.023528574034571648, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.02291901782155037, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.017663385719060898, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.018407367169857025, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.01647288165986538, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.014484160579741001, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.61.self_attn.q_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.11683621257543564, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10919132083654404, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.10563831776380539, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.09543435275554657, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.0548836849629879, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.05152008682489395, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.0638616681098938, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.05851195007562637, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.05571182072162628, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.049109410494565964, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.04686259478330612, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03250598534941673, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.028011230751872063, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.02634923718869686, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.02594720758497715, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.016293782740831375, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.013583303429186344, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.013403339311480522, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.012409430928528309, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.012156409211456776, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.008586246520280838, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.00840943492949009, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.008064026944339275, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.005510533694177866, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.61.self_attn.k_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.09063474088907242, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.08467312157154083, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.08165699243545532, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.07370966672897339, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.04252772778272629, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.039787545800209045, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.04973185434937477, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.04572422802448273, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.04318079352378845, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.038039591163396835, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.03617214784026146, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.025308512151241302, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.021914435550570488, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.020430950447916985, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.020075788721442223, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.01269535068422556, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.010600081644952297, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.010434115305542946, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.009698829613626003, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.009473629295825958, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.006767850369215012, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.006682945415377617, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.006305580958724022, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.004476826172322035, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.61.self_attn.v_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.242019921541214, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.2263769656419754, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.22100116312503815, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.1999950110912323, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.11425095051527023, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.10823677480220795, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.13008031249046326, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.1182904839515686, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.11597564816474915, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.10213831812143326, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.09728632122278214, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.06648403406143188, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.05688052251935005, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.054945215582847595, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.054441358894109726, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.03363572806119919, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.028266407549381256, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.028087522834539413, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.025802547112107277, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.025487013161182404, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.017627548426389694, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.017077699303627014, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.016901599243283272, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.011109873652458191, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.61.self_attn.o_proj", "numel": 26214400, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.17529296875, "total_bits": 57024000.0, "err": 0.13413631916046143, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 62266880.00000001, "err": 0.10122969001531601, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 68820480.0, "err": 0.09042178094387054, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.72529296875, "total_bits": 71441920.0, "err": 0.08297586441040039, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.22529296875, "total_bits": 84549120.0, "err": 0.05918046832084656, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.72529296875, "total_bits": 97656320.0, "err": 0.04667286574840546, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 79464320.0, "err": 0.07943028956651688, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 81927680.0, "err": 0.06569281965494156, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.17529296875, "total_bits": 83238400.0, "err": 0.06326556205749512, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.52529296875, "total_bits": 92413440.0, "err": 0.045596539974212646, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.662646484375, "total_bits": 96014080.0, "err": 0.044313471764326096, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 105678720.0, "err": 0.03997613489627838, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 108142080.0, "err": 0.034131329506635666, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.22529296875, "total_bits": 110763520.0, "err": 0.03125510364770889, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.32529296875, "total_bits": 113384960.00000001, "err": 0.03055107593536377, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 131893120.0, "err": 0.021909909322857857, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.22529296875, "total_bits": 136977920.0, "err": 0.020192265510559082, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.32529296875, "total_bits": 139599360.0, "err": 0.020072774961590767, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.52529296875, "total_bits": 144842240.0, "err": 0.0182229932397604, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.72529296875, "total_bits": 150085120.0, "err": 0.017862215638160706, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 158107520.0, "err": 0.014633014798164368, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 160570880.0, "err": 0.01684631034731865, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.2313232421875, "total_bits": 163350400.0, "err": 0.013908538967370987, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 212999679.99999997, "err": 0.015279815532267094, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.61.mlp.gate_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.1517898142337799, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.14205950498580933, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.13895872235298157, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.12572741508483887, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.07163466513156891, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.06805288046598434, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.08009928464889526, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0738406851887703, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0727003961801529, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06411640346050262, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.06075345352292061, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.04087367281317711, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03539900481700897, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.03440210595726967, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.034158073365688324, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.020425917580723763, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.017828812822699547, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.017757263034582138, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.016315534710884094, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.016164349392056465, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.010821342468261719, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.010974681004881859, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.010489673353731632, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.007496010046452284, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.61.mlp.up_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1751085069444445, "total_bits": 153951744.0, "err": 0.14521941542625427, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.3751085069444446, "total_bits": 168107520.0, "err": 0.1359298676252365, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.6251085069444446, "total_bits": 185802240.0, "err": 0.1329224705696106, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7251085069444443, "total_bits": 192880128.0, "err": 0.12032217532396317, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.2251085069444443, "total_bits": 228269568.0, "err": 0.06898550689220428, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7251085069444443, "total_bits": 263659008.0, "err": 0.06555196642875671, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.031277126736111, "total_bits": 214550400.0, "err": 0.07728023827075958, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.1251085069444446, "total_bits": 221191680.0, "err": 0.0711071789264679, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1751085069444445, "total_bits": 224730624.0, "err": 0.0700111985206604, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.5251085069444446, "total_bits": 249503232.0, "err": 0.06178134307265282, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6625542534722224, "total_bits": 259231488.0, "err": 0.058675456792116165, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.031277126736111, "total_bits": 285329280.0, "err": 0.039870135486125946, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.125108506944445, "total_bits": 291970560.0, "err": 0.03456425666809082, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.225108506944444, "total_bits": 299048448.0, "err": 0.033607982099056244, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.325108506944445, "total_bits": 306126336.0, "err": 0.0333833247423172, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.031277126736111, "total_bits": 356108160.0, "err": 0.020110467448830605, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.225108506944444, "total_bits": 369827328.0, "err": 0.018204348161816597, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.325108506944445, "total_bits": 376905216.0, "err": 0.018143970519304276, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525108506944444, "total_bits": 391060992.0, "err": 0.016848908737301826, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.725108506944444, "total_bits": 405216768.0, "err": 0.016718925908207893, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.031277126736111, "total_bits": 426887040.0, "err": 0.011305813677608967, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.125108506944445, "total_bits": 433528320.0, "err": 0.012260695919394493, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.231277126736111, "total_bits": 441042816.0, "err": 0.011008653789758682, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.125108506944445, "total_bits": 575086080.0, "err": 0.009561694227159023, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] }, { "key": "model.layers.61.mlp.down_proj", "numel": 70778880, "options": [ { "desc": "0.05:3b_32g/0.95:2b_32g s4", "bpw": 2.1762188946759258, "total_bits": 154030336.0, "err": 0.10758814215660095, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.25:3b_32g/0.75:2b_32g s4", "bpw": 2.37529296875, "total_bits": 168120576.0, "err": 0.09478842467069626, "qparams": { "group_size": { "3": 32, "2": 32 }, "bits": [ 3, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.25:4b_32g/0.75:2b_32g s4", "bpw": 2.62529296875, "total_bits": 185815296.0, "err": 0.08862003684043884, "qparams": { "group_size": { "4": 32, "2": 32 }, "bits": [ 4, 2 ], "bits_prop": [ 0.25, 0.75 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.4:3b_32g/0.5:2b_32g s4", "bpw": 2.7294596354166667, "total_bits": 193188096.0, "err": 0.0783499926328659, "qparams": { "group_size": { "4": 32, "3": 32, "2": 32 }, "bits": [ 4, 3, 2 ], "bits_prop": [ 0.1, 0.4, 0.5 ], "scale_bits": 4 } }, { "desc": "0.1:4b_32g/0.9:3b_32g s4", "bpw": 3.227144820601852, "total_bits": 228413696.0, "err": 0.04963487759232521, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.2:6b_32g/0.8:3b_32g s4", "bpw": 3.7294596354166667, "total_bits": 263966976.0, "err": 0.044031187891960144, "qparams": { "group_size": { "6": 32, "3": 32 }, "bits": [ 6, 3 ], "bits_prop": [ 0.2, 0.8 ], "scale_bits": 4 } }, { "desc": "1.0:3b_128g s4", "bpw": 3.0313232421875, "total_bits": 214553664.0, "err": 0.06355269998311996, "qparams": { "group_size": { "3": 128 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:3b_32g s4", "bpw": 3.12529296875, "total_bits": 221204736.0, "err": 0.056178055703639984, "qparams": { "group_size": { "3": 32 }, "bits": [ 3 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.05:4b_32g/0.95:3b_32g s4", "bpw": 3.1762188946759258, "total_bits": 224809216.0, "err": 0.051613081246614456, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.05, 0.95 ], "scale_bits": 4 } }, { "desc": "0.4:4b_32g/0.6:3b_32g s4", "bpw": 3.525755931712963, "total_bits": 249549056.0, "err": 0.04254665970802307, "qparams": { "group_size": { "4": 32, "3": 32 }, "bits": [ 4, 3 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.6:4b_64g/0.4:3b_64g s4", "bpw": 3.6644983362268517, "total_bits": 259369088.0, "err": 0.040626659989356995, "qparams": { "group_size": { "4": 64, "3": 64 }, "bits": [ 4, 3 ], "bits_prop": [ 0.6, 0.4 ], "scale_bits": 4 } }, { "desc": "1.0:4b_128g s4", "bpw": 4.0313232421875, "total_bits": 285332544.0, "err": 0.03274378553032875, "qparams": { "group_size": { "4": 128 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:4b_32g s4", "bpw": 4.12529296875, "total_bits": 291983616.0, "err": 0.027787724509835243, "qparams": { "group_size": { "4": 32 }, "bits": [ 4 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:5b_32g/0.9:4b_32g s4", "bpw": 4.227144820601852, "total_bits": 299192576.0, "err": 0.024909326806664467, "qparams": { "group_size": { "5": 32, "4": 32 }, "bits": [ 5, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:4b_32g s4", "bpw": 4.3289966724537035, "total_bits": 306401536.0, "err": 0.024194132536649704, "qparams": { "group_size": { "6": 32, "4": 32 }, "bits": [ 6, 4 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:5b_128g s4", "bpw": 5.0313232421875, "total_bits": 356111424.0, "err": 0.016975000500679016, "qparams": { "group_size": { "5": 128 }, "bits": [ 5 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:6b_32g/0.9:5b_32g s4", "bpw": 5.227144820601852, "total_bits": 369971456.0, "err": 0.014547464437782764, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "0.05:8b_32g/0.05:6b_32g/0.9:5b_32g s4", "bpw": 5.3289966724537035, "total_bits": 377180416.0, "err": 0.01428919192403555, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.05, 0.05, 0.9 ], "scale_bits": 4 } }, { "desc": "0.4:6b_32g/0.6:5b_32g s4", "bpw": 5.525755931712963, "total_bits": 391106816.0, "err": 0.013353385962545872, "qparams": { "group_size": { "6": 32, "5": 32 }, "bits": [ 6, 5 ], "bits_prop": [ 0.4, 0.6 ], "scale_bits": 4 } }, { "desc": "0.1:8b_32g/0.3:6b_32g/0.6:5b_32g s4", "bpw": 5.731774450231481, "total_bits": 405688576.0, "err": 0.012943707406520844, "qparams": { "group_size": { "8": 32, "6": 32, "5": 32 }, "bits": [ 8, 6, 5 ], "bits_prop": [ 0.1, 0.3, 0.6 ], "scale_bits": 4 } }, { "desc": "1.0:6b_128g s4", "bpw": 6.0313232421875, "total_bits": 426890304.0, "err": 0.010135922580957413, "qparams": { "group_size": { "6": 128 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "1.0:6b_32g s4", "bpw": 6.12529296875, "total_bits": 433541376.0, "err": 0.011184264905750751, "qparams": { "group_size": { "6": 32 }, "bits": [ 6 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } }, { "desc": "0.1:8b_128g/0.9:6b_128g s4", "bpw": 6.235026945891204, "total_bits": 441308224.0, "err": 0.009200183674693108, "qparams": { "group_size": { "8": 128, "6": 128 }, "bits": [ 8, 6 ], "bits_prop": [ 0.1, 0.9 ], "scale_bits": 4 } }, { "desc": "1.0:8b_32g s4", "bpw": 8.12529296875, "total_bits": 575099136.0, "err": 0.009404009208083153, "qparams": { "group_size": { "8": 32 }, "bits": [ 8 ], "bits_prop": [ 1.0 ], "scale_bits": 4 } } ] } ], "last_module_idx": 126, "base_perplexity": 9.066884634940976 }