Files changed (1) hide show
  1. README.md +390 -1
README.md CHANGED
@@ -8,4 +8,393 @@ tags:
8
 
9
  Merge of top 7B models and the SLERP of other 7B models
10
 
11
- > mergekit is a toolkit for merging pre-trained language models. mergekit uses an out-of-core approach to perform unreasonably elaborate merges in resource-constrained situations. Merges can be run entirely on CPU or accelerated with as little as 8 GB of VRAM. Many merging algorithms are supported, with more coming as they catch my attention.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  Merge of top 7B models and the SLERP of other 7B models
10
 
11
+ > mergekit is a toolkit for merging pre-trained language models. mergekit uses an out-of-core approach to perform unreasonably elaborate merges in resource-constrained situations. Merges can be run entirely on CPU or accelerated with as little as 8 GB of VRAM. Many merging algorithms are supported, with more coming as they catch my attention.
12
+ >
13
+ > ## Eval
14
+
15
+
16
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/5fd5e18a90b6dc4633f6d292/3a2An3rpaLMusQrtQ74Up.png)
17
+
18
+
19
+ ```python
20
+ {
21
+ "all": {
22
+ "acc": 0.6568351479800627,
23
+ "acc_stderr": 0.03199600851869088,
24
+ "acc_norm": 0.6554901222242155,
25
+ "acc_norm_stderr": 0.03267670432184765,
26
+ "mc1": 0.5104039167686658,
27
+ "mc1_stderr": 0.017499711430249268,
28
+ "mc2": 0.6556430108444109,
29
+ "mc2_stderr": 0.015519025079862213
30
+ },
31
+ "harness|arc:challenge|25": {
32
+ "acc": 0.6919795221843004,
33
+ "acc_stderr": 0.013491429517292038,
34
+ "acc_norm": 0.7218430034129693,
35
+ "acc_norm_stderr": 0.013094469919538812
36
+ },
37
+ "harness|hellaswag|10": {
38
+ "acc": 0.7202748456482773,
39
+ "acc_stderr": 0.0044794676194648,
40
+ "acc_norm": 0.8828918542123083,
41
+ "acc_norm_stderr": 0.003208919510309931
42
+ },
43
+ "harness|hendrycksTest-abstract_algebra|5": {
44
+ "acc": 0.33,
45
+ "acc_stderr": 0.047258156262526045,
46
+ "acc_norm": 0.33,
47
+ "acc_norm_stderr": 0.047258156262526045
48
+ },
49
+ "harness|hendrycksTest-anatomy|5": {
50
+ "acc": 0.6518518518518519,
51
+ "acc_stderr": 0.041153246103369526,
52
+ "acc_norm": 0.6518518518518519,
53
+ "acc_norm_stderr": 0.041153246103369526
54
+ },
55
+ "harness|hendrycksTest-astronomy|5": {
56
+ "acc": 0.7039473684210527,
57
+ "acc_stderr": 0.03715062154998904,
58
+ "acc_norm": 0.7039473684210527,
59
+ "acc_norm_stderr": 0.03715062154998904
60
+ },
61
+ "harness|hendrycksTest-business_ethics|5": {
62
+ "acc": 0.66,
63
+ "acc_stderr": 0.04760952285695238,
64
+ "acc_norm": 0.66,
65
+ "acc_norm_stderr": 0.04760952285695238
66
+ },
67
+ "harness|hendrycksTest-clinical_knowledge|5": {
68
+ "acc": 0.6981132075471698,
69
+ "acc_stderr": 0.02825420034443866,
70
+ "acc_norm": 0.6981132075471698,
71
+ "acc_norm_stderr": 0.02825420034443866
72
+ },
73
+ "harness|hendrycksTest-college_biology|5": {
74
+ "acc": 0.7708333333333334,
75
+ "acc_stderr": 0.03514697467862388,
76
+ "acc_norm": 0.7708333333333334,
77
+ "acc_norm_stderr": 0.03514697467862388
78
+ },
79
+ "harness|hendrycksTest-college_chemistry|5": {
80
+ "acc": 0.48,
81
+ "acc_stderr": 0.050211673156867795,
82
+ "acc_norm": 0.48,
83
+ "acc_norm_stderr": 0.050211673156867795
84
+ },
85
+ "harness|hendrycksTest-college_computer_science|5": {
86
+ "acc": 0.52,
87
+ "acc_stderr": 0.050211673156867795,
88
+ "acc_norm": 0.52,
89
+ "acc_norm_stderr": 0.050211673156867795
90
+ },
91
+ "harness|hendrycksTest-college_mathematics|5": {
92
+ "acc": 0.27,
93
+ "acc_stderr": 0.044619604333847394,
94
+ "acc_norm": 0.27,
95
+ "acc_norm_stderr": 0.044619604333847394
96
+ },
97
+ "harness|hendrycksTest-college_medicine|5": {
98
+ "acc": 0.6705202312138728,
99
+ "acc_stderr": 0.03583901754736412,
100
+ "acc_norm": 0.6705202312138728,
101
+ "acc_norm_stderr": 0.03583901754736412
102
+ },
103
+ "harness|hendrycksTest-college_physics|5": {
104
+ "acc": 0.4019607843137255,
105
+ "acc_stderr": 0.04878608714466996,
106
+ "acc_norm": 0.4019607843137255,
107
+ "acc_norm_stderr": 0.04878608714466996
108
+ },
109
+ "harness|hendrycksTest-computer_security|5": {
110
+ "acc": 0.75,
111
+ "acc_stderr": 0.04351941398892446,
112
+ "acc_norm": 0.75,
113
+ "acc_norm_stderr": 0.04351941398892446
114
+ },
115
+ "harness|hendrycksTest-conceptual_physics|5": {
116
+ "acc": 0.5914893617021276,
117
+ "acc_stderr": 0.032134180267015755,
118
+ "acc_norm": 0.5914893617021276,
119
+ "acc_norm_stderr": 0.032134180267015755
120
+ },
121
+ "harness|hendrycksTest-econometrics|5": {
122
+ "acc": 0.5087719298245614,
123
+ "acc_stderr": 0.04702880432049615,
124
+ "acc_norm": 0.5087719298245614,
125
+ "acc_norm_stderr": 0.04702880432049615
126
+ },
127
+ "harness|hendrycksTest-electrical_engineering|5": {
128
+ "acc": 0.5724137931034483,
129
+ "acc_stderr": 0.04122737111370332,
130
+ "acc_norm": 0.5724137931034483,
131
+ "acc_norm_stderr": 0.04122737111370332
132
+ },
133
+ "harness|hendrycksTest-elementary_mathematics|5": {
134
+ "acc": 0.42592592592592593,
135
+ "acc_stderr": 0.02546714904546955,
136
+ "acc_norm": 0.42592592592592593,
137
+ "acc_norm_stderr": 0.02546714904546955
138
+ },
139
+ "harness|hendrycksTest-formal_logic|5": {
140
+ "acc": 0.49206349206349204,
141
+ "acc_stderr": 0.044715725362943486,
142
+ "acc_norm": 0.49206349206349204,
143
+ "acc_norm_stderr": 0.044715725362943486
144
+ },
145
+ "harness|hendrycksTest-global_facts|5": {
146
+ "acc": 0.37,
147
+ "acc_stderr": 0.04852365870939099,
148
+ "acc_norm": 0.37,
149
+ "acc_norm_stderr": 0.04852365870939099
150
+ },
151
+ "harness|hendrycksTest-high_school_biology|5": {
152
+ "acc": 0.7903225806451613,
153
+ "acc_stderr": 0.023157879349083525,
154
+ "acc_norm": 0.7903225806451613,
155
+ "acc_norm_stderr": 0.023157879349083525
156
+ },
157
+ "harness|hendrycksTest-high_school_chemistry|5": {
158
+ "acc": 0.5073891625615764,
159
+ "acc_stderr": 0.035176035403610105,
160
+ "acc_norm": 0.5073891625615764,
161
+ "acc_norm_stderr": 0.035176035403610105
162
+ },
163
+ "harness|hendrycksTest-high_school_computer_science|5": {
164
+ "acc": 0.66,
165
+ "acc_stderr": 0.04760952285695237,
166
+ "acc_norm": 0.66,
167
+ "acc_norm_stderr": 0.04760952285695237
168
+ },
169
+ "harness|hendrycksTest-high_school_european_history|5": {
170
+ "acc": 0.7757575757575758,
171
+ "acc_stderr": 0.03256866661681102,
172
+ "acc_norm": 0.7757575757575758,
173
+ "acc_norm_stderr": 0.03256866661681102
174
+ },
175
+ "harness|hendrycksTest-high_school_geography|5": {
176
+ "acc": 0.7929292929292929,
177
+ "acc_stderr": 0.028869778460267045,
178
+ "acc_norm": 0.7929292929292929,
179
+ "acc_norm_stderr": 0.028869778460267045
180
+ },
181
+ "harness|hendrycksTest-high_school_government_and_politics|5": {
182
+ "acc": 0.9067357512953368,
183
+ "acc_stderr": 0.020986854593289733,
184
+ "acc_norm": 0.9067357512953368,
185
+ "acc_norm_stderr": 0.020986854593289733
186
+ },
187
+ "harness|hendrycksTest-high_school_macroeconomics|5": {
188
+ "acc": 0.6666666666666666,
189
+ "acc_stderr": 0.023901157979402534,
190
+ "acc_norm": 0.6666666666666666,
191
+ "acc_norm_stderr": 0.023901157979402534
192
+ },
193
+ "harness|hendrycksTest-high_school_mathematics|5": {
194
+ "acc": 0.34814814814814815,
195
+ "acc_stderr": 0.02904560029061625,
196
+ "acc_norm": 0.34814814814814815,
197
+ "acc_norm_stderr": 0.02904560029061625
198
+ },
199
+ "harness|hendrycksTest-high_school_microeconomics|5": {
200
+ "acc": 0.6764705882352942,
201
+ "acc_stderr": 0.030388353551886793,
202
+ "acc_norm": 0.6764705882352942,
203
+ "acc_norm_stderr": 0.030388353551886793
204
+ },
205
+ "harness|hendrycksTest-high_school_physics|5": {
206
+ "acc": 0.36423841059602646,
207
+ "acc_stderr": 0.03929111781242742,
208
+ "acc_norm": 0.36423841059602646,
209
+ "acc_norm_stderr": 0.03929111781242742
210
+ },
211
+ "harness|hendrycksTest-high_school_psychology|5": {
212
+ "acc": 0.8366972477064221,
213
+ "acc_stderr": 0.01584825580650155,
214
+ "acc_norm": 0.8366972477064221,
215
+ "acc_norm_stderr": 0.01584825580650155
216
+ },
217
+ "harness|hendrycksTest-high_school_statistics|5": {
218
+ "acc": 0.5046296296296297,
219
+ "acc_stderr": 0.03409825519163572,
220
+ "acc_norm": 0.5046296296296297,
221
+ "acc_norm_stderr": 0.03409825519163572
222
+ },
223
+ "harness|hendrycksTest-high_school_us_history|5": {
224
+ "acc": 0.8529411764705882,
225
+ "acc_stderr": 0.024857478080250447,
226
+ "acc_norm": 0.8529411764705882,
227
+ "acc_norm_stderr": 0.024857478080250447
228
+ },
229
+ "harness|hendrycksTest-high_school_world_history|5": {
230
+ "acc": 0.8143459915611815,
231
+ "acc_stderr": 0.025310495376944856,
232
+ "acc_norm": 0.8143459915611815,
233
+ "acc_norm_stderr": 0.025310495376944856
234
+ },
235
+ "harness|hendrycksTest-human_aging|5": {
236
+ "acc": 0.6816143497757847,
237
+ "acc_stderr": 0.03126580522513713,
238
+ "acc_norm": 0.6816143497757847,
239
+ "acc_norm_stderr": 0.03126580522513713
240
+ },
241
+ "harness|hendrycksTest-human_sexuality|5": {
242
+ "acc": 0.7862595419847328,
243
+ "acc_stderr": 0.0359546161177469,
244
+ "acc_norm": 0.7862595419847328,
245
+ "acc_norm_stderr": 0.0359546161177469
246
+ },
247
+ "harness|hendrycksTest-international_law|5": {
248
+ "acc": 0.7933884297520661,
249
+ "acc_stderr": 0.03695980128098824,
250
+ "acc_norm": 0.7933884297520661,
251
+ "acc_norm_stderr": 0.03695980128098824
252
+ },
253
+ "harness|hendrycksTest-jurisprudence|5": {
254
+ "acc": 0.7870370370370371,
255
+ "acc_stderr": 0.0395783547198098,
256
+ "acc_norm": 0.7870370370370371,
257
+ "acc_norm_stderr": 0.0395783547198098
258
+ },
259
+ "harness|hendrycksTest-logical_fallacies|5": {
260
+ "acc": 0.7730061349693251,
261
+ "acc_stderr": 0.03291099578615769,
262
+ "acc_norm": 0.7730061349693251,
263
+ "acc_norm_stderr": 0.03291099578615769
264
+ },
265
+ "harness|hendrycksTest-machine_learning|5": {
266
+ "acc": 0.48214285714285715,
267
+ "acc_stderr": 0.047427623612430116,
268
+ "acc_norm": 0.48214285714285715,
269
+ "acc_norm_stderr": 0.047427623612430116
270
+ },
271
+ "harness|hendrycksTest-management|5": {
272
+ "acc": 0.7864077669902912,
273
+ "acc_stderr": 0.040580420156460344,
274
+ "acc_norm": 0.7864077669902912,
275
+ "acc_norm_stderr": 0.040580420156460344
276
+ },
277
+ "harness|hendrycksTest-marketing|5": {
278
+ "acc": 0.8803418803418803,
279
+ "acc_stderr": 0.021262719400406974,
280
+ "acc_norm": 0.8803418803418803,
281
+ "acc_norm_stderr": 0.021262719400406974
282
+ },
283
+ "harness|hendrycksTest-medical_genetics|5": {
284
+ "acc": 0.73,
285
+ "acc_stderr": 0.0446196043338474,
286
+ "acc_norm": 0.73,
287
+ "acc_norm_stderr": 0.0446196043338474
288
+ },
289
+ "harness|hendrycksTest-miscellaneous|5": {
290
+ "acc": 0.8275862068965517,
291
+ "acc_stderr": 0.013507943909371802,
292
+ "acc_norm": 0.8275862068965517,
293
+ "acc_norm_stderr": 0.013507943909371802
294
+ },
295
+ "harness|hendrycksTest-moral_disputes|5": {
296
+ "acc": 0.7543352601156069,
297
+ "acc_stderr": 0.023176298203992005,
298
+ "acc_norm": 0.7543352601156069,
299
+ "acc_norm_stderr": 0.023176298203992005
300
+ },
301
+ "harness|hendrycksTest-moral_scenarios|5": {
302
+ "acc": 0.45027932960893857,
303
+ "acc_stderr": 0.01663961523684581,
304
+ "acc_norm": 0.45027932960893857,
305
+ "acc_norm_stderr": 0.01663961523684581
306
+ },
307
+ "harness|hendrycksTest-nutrition|5": {
308
+ "acc": 0.7254901960784313,
309
+ "acc_stderr": 0.02555316999182652,
310
+ "acc_norm": 0.7254901960784313,
311
+ "acc_norm_stderr": 0.02555316999182652
312
+ },
313
+ "harness|hendrycksTest-philosophy|5": {
314
+ "acc": 0.7138263665594855,
315
+ "acc_stderr": 0.025670259242188933,
316
+ "acc_norm": 0.7138263665594855,
317
+ "acc_norm_stderr": 0.025670259242188933
318
+ },
319
+ "harness|hendrycksTest-prehistory|5": {
320
+ "acc": 0.7561728395061729,
321
+ "acc_stderr": 0.02389187954195961,
322
+ "acc_norm": 0.7561728395061729,
323
+ "acc_norm_stderr": 0.02389187954195961
324
+ },
325
+ "harness|hendrycksTest-professional_accounting|5": {
326
+ "acc": 0.46808510638297873,
327
+ "acc_stderr": 0.029766675075873866,
328
+ "acc_norm": 0.46808510638297873,
329
+ "acc_norm_stderr": 0.029766675075873866
330
+ },
331
+ "harness|hendrycksTest-professional_law|5": {
332
+ "acc": 0.4745762711864407,
333
+ "acc_stderr": 0.012753716929101004,
334
+ "acc_norm": 0.4745762711864407,
335
+ "acc_norm_stderr": 0.012753716929101004
336
+ },
337
+ "harness|hendrycksTest-professional_medicine|5": {
338
+ "acc": 0.6911764705882353,
339
+ "acc_stderr": 0.02806499816704009,
340
+ "acc_norm": 0.6911764705882353,
341
+ "acc_norm_stderr": 0.02806499816704009
342
+ },
343
+ "harness|hendrycksTest-professional_psychology|5": {
344
+ "acc": 0.6748366013071896,
345
+ "acc_stderr": 0.01895088677080631,
346
+ "acc_norm": 0.6748366013071896,
347
+ "acc_norm_stderr": 0.01895088677080631
348
+ },
349
+ "harness|hendrycksTest-public_relations|5": {
350
+ "acc": 0.6545454545454545,
351
+ "acc_stderr": 0.04554619617541054,
352
+ "acc_norm": 0.6545454545454545,
353
+ "acc_norm_stderr": 0.04554619617541054
354
+ },
355
+ "harness|hendrycksTest-security_studies|5": {
356
+ "acc": 0.7346938775510204,
357
+ "acc_stderr": 0.028263889943784603,
358
+ "acc_norm": 0.7346938775510204,
359
+ "acc_norm_stderr": 0.028263889943784603
360
+ },
361
+ "harness|hendrycksTest-sociology|5": {
362
+ "acc": 0.8258706467661692,
363
+ "acc_stderr": 0.026814951200421603,
364
+ "acc_norm": 0.8258706467661692,
365
+ "acc_norm_stderr": 0.026814951200421603
366
+ },
367
+ "harness|hendrycksTest-us_foreign_policy|5": {
368
+ "acc": 0.85,
369
+ "acc_stderr": 0.03588702812826371,
370
+ "acc_norm": 0.85,
371
+ "acc_norm_stderr": 0.03588702812826371
372
+ },
373
+ "harness|hendrycksTest-virology|5": {
374
+ "acc": 0.5602409638554217,
375
+ "acc_stderr": 0.03864139923699122,
376
+ "acc_norm": 0.5602409638554217,
377
+ "acc_norm_stderr": 0.03864139923699122
378
+ },
379
+ "harness|hendrycksTest-world_religions|5": {
380
+ "acc": 0.8421052631578947,
381
+ "acc_stderr": 0.027966785859160893,
382
+ "acc_norm": 0.8421052631578947,
383
+ "acc_norm_stderr": 0.027966785859160893
384
+ },
385
+ "harness|truthfulqa:mc|0": {
386
+ "mc1": 0.5104039167686658,
387
+ "mc1_stderr": 0.017499711430249268,
388
+ "mc2": 0.6556430108444109,
389
+ "mc2_stderr": 0.015519025079862213
390
+ },
391
+ "harness|winogrande|5": {
392
+ "acc": 0.8516179952644041,
393
+ "acc_stderr": 0.009990706005184136
394
+ },
395
+ "harness|gsm8k|5": {
396
+ "acc": 0.7338893100833965,
397
+ "acc_stderr": 0.012172750939040328
398
+ }
399
+ }
400
+ ```