MaziyarPanahi commited on
Commit
2a99342
1 Parent(s): d7a439c

Update README.md (#1)

Browse files

- Update README.md (ca3db24dd2e087e0e9fc503cb9c5e4e4771979bd)

Files changed (1) hide show
  1. README.md +387 -1
README.md CHANGED
@@ -8,4 +8,390 @@ tags:
8
 
9
  Merge of top 7B models with DARE method
10
 
11
- > mergekit is a toolkit for merging pre-trained language models. mergekit uses an out-of-core approach to perform unreasonably elaborate merges in resource-constrained situations. Merges can be run entirely on CPU or accelerated with as little as 8 GB of VRAM. Many merging algorithms are supported, with more coming as they catch my attention.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  Merge of top 7B models with DARE method
10
 
11
+ > mergekit is a toolkit for merging pre-trained language models. mergekit uses an out-of-core approach to perform unreasonably elaborate merges in resource-constrained situations. Merges can be run entirely on CPU or accelerated with as little as 8 GB of VRAM. Many merging algorithms are supported, with more coming as they catch my attention.
12
+
13
+ ## Eval
14
+
15
+ ```python
16
+ {
17
+ "all": {
18
+ "acc": 0.6547370286177235,
19
+ "acc_stderr": 0.03204709242170183,
20
+ "acc_norm": 0.6537337854798912,
21
+ "acc_norm_stderr": 0.03272317883588649,
22
+ "mc1": 0.5189718482252142,
23
+ "mc1_stderr": 0.01749089640576236,
24
+ "mc2": 0.6631825155680797,
25
+ "mc2_stderr": 0.01527641053841743
26
+ },
27
+ "harness|arc:challenge|25": {
28
+ "acc": 0.6953924914675768,
29
+ "acc_stderr": 0.013449522109932485,
30
+ "acc_norm": 0.7175767918088737,
31
+ "acc_norm_stderr": 0.013155456884097225
32
+ },
33
+ "harness|hellaswag|10": {
34
+ "acc": 0.7120095598486357,
35
+ "acc_stderr": 0.004519011688417168,
36
+ "acc_norm": 0.8820952001593309,
37
+ "acc_norm_stderr": 0.003218362717491129
38
+ },
39
+ "harness|hendrycksTest-abstract_algebra|5": {
40
+ "acc": 0.33,
41
+ "acc_stderr": 0.047258156262526045,
42
+ "acc_norm": 0.33,
43
+ "acc_norm_stderr": 0.047258156262526045
44
+ },
45
+ "harness|hendrycksTest-anatomy|5": {
46
+ "acc": 0.6296296296296297,
47
+ "acc_stderr": 0.041716541613545426,
48
+ "acc_norm": 0.6296296296296297,
49
+ "acc_norm_stderr": 0.041716541613545426
50
+ },
51
+ "harness|hendrycksTest-astronomy|5": {
52
+ "acc": 0.7105263157894737,
53
+ "acc_stderr": 0.03690677986137283,
54
+ "acc_norm": 0.7105263157894737,
55
+ "acc_norm_stderr": 0.03690677986137283
56
+ },
57
+ "harness|hendrycksTest-business_ethics|5": {
58
+ "acc": 0.64,
59
+ "acc_stderr": 0.04824181513244218,
60
+ "acc_norm": 0.64,
61
+ "acc_norm_stderr": 0.04824181513244218
62
+ },
63
+ "harness|hendrycksTest-clinical_knowledge|5": {
64
+ "acc": 0.7056603773584905,
65
+ "acc_stderr": 0.02804918631569525,
66
+ "acc_norm": 0.7056603773584905,
67
+ "acc_norm_stderr": 0.02804918631569525
68
+ },
69
+ "harness|hendrycksTest-college_biology|5": {
70
+ "acc": 0.7638888888888888,
71
+ "acc_stderr": 0.03551446610810826,
72
+ "acc_norm": 0.7638888888888888,
73
+ "acc_norm_stderr": 0.03551446610810826
74
+ },
75
+ "harness|hendrycksTest-college_chemistry|5": {
76
+ "acc": 0.48,
77
+ "acc_stderr": 0.050211673156867795,
78
+ "acc_norm": 0.48,
79
+ "acc_norm_stderr": 0.050211673156867795
80
+ },
81
+ "harness|hendrycksTest-college_computer_science|5": {
82
+ "acc": 0.54,
83
+ "acc_stderr": 0.05009082659620333,
84
+ "acc_norm": 0.54,
85
+ "acc_norm_stderr": 0.05009082659620333
86
+ },
87
+ "harness|hendrycksTest-college_mathematics|5": {
88
+ "acc": 0.31,
89
+ "acc_stderr": 0.04648231987117316,
90
+ "acc_norm": 0.31,
91
+ "acc_norm_stderr": 0.04648231987117316
92
+ },
93
+ "harness|hendrycksTest-college_medicine|5": {
94
+ "acc": 0.6705202312138728,
95
+ "acc_stderr": 0.03583901754736411,
96
+ "acc_norm": 0.6705202312138728,
97
+ "acc_norm_stderr": 0.03583901754736411
98
+ },
99
+ "harness|hendrycksTest-college_physics|5": {
100
+ "acc": 0.4215686274509804,
101
+ "acc_stderr": 0.04913595201274498,
102
+ "acc_norm": 0.4215686274509804,
103
+ "acc_norm_stderr": 0.04913595201274498
104
+ },
105
+ "harness|hendrycksTest-computer_security|5": {
106
+ "acc": 0.78,
107
+ "acc_stderr": 0.04163331998932263,
108
+ "acc_norm": 0.78,
109
+ "acc_norm_stderr": 0.04163331998932263
110
+ },
111
+ "harness|hendrycksTest-conceptual_physics|5": {
112
+ "acc": 0.5787234042553191,
113
+ "acc_stderr": 0.03227834510146268,
114
+ "acc_norm": 0.5787234042553191,
115
+ "acc_norm_stderr": 0.03227834510146268
116
+ },
117
+ "harness|hendrycksTest-econometrics|5": {
118
+ "acc": 0.5,
119
+ "acc_stderr": 0.047036043419179864,
120
+ "acc_norm": 0.5,
121
+ "acc_norm_stderr": 0.047036043419179864
122
+ },
123
+ "harness|hendrycksTest-electrical_engineering|5": {
124
+ "acc": 0.5586206896551724,
125
+ "acc_stderr": 0.04137931034482758,
126
+ "acc_norm": 0.5586206896551724,
127
+ "acc_norm_stderr": 0.04137931034482758
128
+ },
129
+ "harness|hendrycksTest-elementary_mathematics|5": {
130
+ "acc": 0.42857142857142855,
131
+ "acc_stderr": 0.02548718714785938,
132
+ "acc_norm": 0.42857142857142855,
133
+ "acc_norm_stderr": 0.02548718714785938
134
+ },
135
+ "harness|hendrycksTest-formal_logic|5": {
136
+ "acc": 0.47619047619047616,
137
+ "acc_stderr": 0.04467062628403273,
138
+ "acc_norm": 0.47619047619047616,
139
+ "acc_norm_stderr": 0.04467062628403273
140
+ },
141
+ "harness|hendrycksTest-global_facts|5": {
142
+ "acc": 0.33,
143
+ "acc_stderr": 0.04725815626252604,
144
+ "acc_norm": 0.33,
145
+ "acc_norm_stderr": 0.04725815626252604
146
+ },
147
+ "harness|hendrycksTest-high_school_biology|5": {
148
+ "acc": 0.7903225806451613,
149
+ "acc_stderr": 0.023157879349083525,
150
+ "acc_norm": 0.7903225806451613,
151
+ "acc_norm_stderr": 0.023157879349083525
152
+ },
153
+ "harness|hendrycksTest-high_school_chemistry|5": {
154
+ "acc": 0.4876847290640394,
155
+ "acc_stderr": 0.035169204442208966,
156
+ "acc_norm": 0.4876847290640394,
157
+ "acc_norm_stderr": 0.035169204442208966
158
+ },
159
+ "harness|hendrycksTest-high_school_computer_science|5": {
160
+ "acc": 0.68,
161
+ "acc_stderr": 0.04688261722621505,
162
+ "acc_norm": 0.68,
163
+ "acc_norm_stderr": 0.04688261722621505
164
+ },
165
+ "harness|hendrycksTest-high_school_european_history|5": {
166
+ "acc": 0.7878787878787878,
167
+ "acc_stderr": 0.03192271569548301,
168
+ "acc_norm": 0.7878787878787878,
169
+ "acc_norm_stderr": 0.03192271569548301
170
+ },
171
+ "harness|hendrycksTest-high_school_geography|5": {
172
+ "acc": 0.797979797979798,
173
+ "acc_stderr": 0.02860620428922987,
174
+ "acc_norm": 0.797979797979798,
175
+ "acc_norm_stderr": 0.02860620428922987
176
+ },
177
+ "harness|hendrycksTest-high_school_government_and_politics|5": {
178
+ "acc": 0.9015544041450777,
179
+ "acc_stderr": 0.021500249576033456,
180
+ "acc_norm": 0.9015544041450777,
181
+ "acc_norm_stderr": 0.021500249576033456
182
+ },
183
+ "harness|hendrycksTest-high_school_macroeconomics|5": {
184
+ "acc": 0.6666666666666666,
185
+ "acc_stderr": 0.023901157979402538,
186
+ "acc_norm": 0.6666666666666666,
187
+ "acc_norm_stderr": 0.023901157979402538
188
+ },
189
+ "harness|hendrycksTest-high_school_mathematics|5": {
190
+ "acc": 0.35185185185185186,
191
+ "acc_stderr": 0.029116617606083008,
192
+ "acc_norm": 0.35185185185185186,
193
+ "acc_norm_stderr": 0.029116617606083008
194
+ },
195
+ "harness|hendrycksTest-high_school_microeconomics|5": {
196
+ "acc": 0.6722689075630253,
197
+ "acc_stderr": 0.03048991141767323,
198
+ "acc_norm": 0.6722689075630253,
199
+ "acc_norm_stderr": 0.03048991141767323
200
+ },
201
+ "harness|hendrycksTest-high_school_physics|5": {
202
+ "acc": 0.36423841059602646,
203
+ "acc_stderr": 0.03929111781242742,
204
+ "acc_norm": 0.36423841059602646,
205
+ "acc_norm_stderr": 0.03929111781242742
206
+ },
207
+ "harness|hendrycksTest-high_school_psychology|5": {
208
+ "acc": 0.8440366972477065,
209
+ "acc_stderr": 0.015555802713590167,
210
+ "acc_norm": 0.8440366972477065,
211
+ "acc_norm_stderr": 0.015555802713590167
212
+ },
213
+ "harness|hendrycksTest-high_school_statistics|5": {
214
+ "acc": 0.5092592592592593,
215
+ "acc_stderr": 0.034093869469927006,
216
+ "acc_norm": 0.5092592592592593,
217
+ "acc_norm_stderr": 0.034093869469927006
218
+ },
219
+ "harness|hendrycksTest-high_school_us_history|5": {
220
+ "acc": 0.8333333333333334,
221
+ "acc_stderr": 0.026156867523931045,
222
+ "acc_norm": 0.8333333333333334,
223
+ "acc_norm_stderr": 0.026156867523931045
224
+ },
225
+ "harness|hendrycksTest-high_school_world_history|5": {
226
+ "acc": 0.7848101265822784,
227
+ "acc_stderr": 0.02675082699467618,
228
+ "acc_norm": 0.7848101265822784,
229
+ "acc_norm_stderr": 0.02675082699467618
230
+ },
231
+ "harness|hendrycksTest-human_aging|5": {
232
+ "acc": 0.6905829596412556,
233
+ "acc_stderr": 0.03102441174057221,
234
+ "acc_norm": 0.6905829596412556,
235
+ "acc_norm_stderr": 0.03102441174057221
236
+ },
237
+ "harness|hendrycksTest-human_sexuality|5": {
238
+ "acc": 0.7786259541984732,
239
+ "acc_stderr": 0.03641297081313729,
240
+ "acc_norm": 0.7786259541984732,
241
+ "acc_norm_stderr": 0.03641297081313729
242
+ },
243
+ "harness|hendrycksTest-international_law|5": {
244
+ "acc": 0.7933884297520661,
245
+ "acc_stderr": 0.03695980128098824,
246
+ "acc_norm": 0.7933884297520661,
247
+ "acc_norm_stderr": 0.03695980128098824
248
+ },
249
+ "harness|hendrycksTest-jurisprudence|5": {
250
+ "acc": 0.7870370370370371,
251
+ "acc_stderr": 0.0395783547198098,
252
+ "acc_norm": 0.7870370370370371,
253
+ "acc_norm_stderr": 0.0395783547198098
254
+ },
255
+ "harness|hendrycksTest-logical_fallacies|5": {
256
+ "acc": 0.7730061349693251,
257
+ "acc_stderr": 0.03291099578615769,
258
+ "acc_norm": 0.7730061349693251,
259
+ "acc_norm_stderr": 0.03291099578615769
260
+ },
261
+ "harness|hendrycksTest-machine_learning|5": {
262
+ "acc": 0.45535714285714285,
263
+ "acc_stderr": 0.047268355537191,
264
+ "acc_norm": 0.45535714285714285,
265
+ "acc_norm_stderr": 0.047268355537191
266
+ },
267
+ "harness|hendrycksTest-management|5": {
268
+ "acc": 0.7766990291262136,
269
+ "acc_stderr": 0.04123553189891431,
270
+ "acc_norm": 0.7766990291262136,
271
+ "acc_norm_stderr": 0.04123553189891431
272
+ },
273
+ "harness|hendrycksTest-marketing|5": {
274
+ "acc": 0.8760683760683761,
275
+ "acc_stderr": 0.021586494001281376,
276
+ "acc_norm": 0.8760683760683761,
277
+ "acc_norm_stderr": 0.021586494001281376
278
+ },
279
+ "harness|hendrycksTest-medical_genetics|5": {
280
+ "acc": 0.72,
281
+ "acc_stderr": 0.045126085985421276,
282
+ "acc_norm": 0.72,
283
+ "acc_norm_stderr": 0.045126085985421276
284
+ },
285
+ "harness|hendrycksTest-miscellaneous|5": {
286
+ "acc": 0.8275862068965517,
287
+ "acc_stderr": 0.013507943909371798,
288
+ "acc_norm": 0.8275862068965517,
289
+ "acc_norm_stderr": 0.013507943909371798
290
+ },
291
+ "harness|hendrycksTest-moral_disputes|5": {
292
+ "acc": 0.7427745664739884,
293
+ "acc_stderr": 0.02353292543104429,
294
+ "acc_norm": 0.7427745664739884,
295
+ "acc_norm_stderr": 0.02353292543104429
296
+ },
297
+ "harness|hendrycksTest-moral_scenarios|5": {
298
+ "acc": 0.4312849162011173,
299
+ "acc_stderr": 0.016563829399047707,
300
+ "acc_norm": 0.4312849162011173,
301
+ "acc_norm_stderr": 0.016563829399047707
302
+ },
303
+ "harness|hendrycksTest-nutrition|5": {
304
+ "acc": 0.7320261437908496,
305
+ "acc_stderr": 0.025360603796242557,
306
+ "acc_norm": 0.7320261437908496,
307
+ "acc_norm_stderr": 0.025360603796242557
308
+ },
309
+ "harness|hendrycksTest-philosophy|5": {
310
+ "acc": 0.7170418006430869,
311
+ "acc_stderr": 0.02558306248998481,
312
+ "acc_norm": 0.7170418006430869,
313
+ "acc_norm_stderr": 0.02558306248998481
314
+ },
315
+ "harness|hendrycksTest-prehistory|5": {
316
+ "acc": 0.7438271604938271,
317
+ "acc_stderr": 0.024288533637726095,
318
+ "acc_norm": 0.7438271604938271,
319
+ "acc_norm_stderr": 0.024288533637726095
320
+ },
321
+ "harness|hendrycksTest-professional_accounting|5": {
322
+ "acc": 0.46808510638297873,
323
+ "acc_stderr": 0.029766675075873866,
324
+ "acc_norm": 0.46808510638297873,
325
+ "acc_norm_stderr": 0.029766675075873866
326
+ },
327
+ "harness|hendrycksTest-professional_law|5": {
328
+ "acc": 0.4726205997392438,
329
+ "acc_stderr": 0.012751075788015055,
330
+ "acc_norm": 0.4726205997392438,
331
+ "acc_norm_stderr": 0.012751075788015055
332
+ },
333
+ "harness|hendrycksTest-professional_medicine|5": {
334
+ "acc": 0.6801470588235294,
335
+ "acc_stderr": 0.02833295951403121,
336
+ "acc_norm": 0.6801470588235294,
337
+ "acc_norm_stderr": 0.02833295951403121
338
+ },
339
+ "harness|hendrycksTest-professional_psychology|5": {
340
+ "acc": 0.6748366013071896,
341
+ "acc_stderr": 0.018950886770806315,
342
+ "acc_norm": 0.6748366013071896,
343
+ "acc_norm_stderr": 0.018950886770806315
344
+ },
345
+ "harness|hendrycksTest-public_relations|5": {
346
+ "acc": 0.6909090909090909,
347
+ "acc_stderr": 0.044262946482000985,
348
+ "acc_norm": 0.6909090909090909,
349
+ "acc_norm_stderr": 0.044262946482000985
350
+ },
351
+ "harness|hendrycksTest-security_studies|5": {
352
+ "acc": 0.7306122448979592,
353
+ "acc_stderr": 0.02840125202902294,
354
+ "acc_norm": 0.7306122448979592,
355
+ "acc_norm_stderr": 0.02840125202902294
356
+ },
357
+ "harness|hendrycksTest-sociology|5": {
358
+ "acc": 0.835820895522388,
359
+ "acc_stderr": 0.026193923544454115,
360
+ "acc_norm": 0.835820895522388,
361
+ "acc_norm_stderr": 0.026193923544454115
362
+ },
363
+ "harness|hendrycksTest-us_foreign_policy|5": {
364
+ "acc": 0.85,
365
+ "acc_stderr": 0.03588702812826371,
366
+ "acc_norm": 0.85,
367
+ "acc_norm_stderr": 0.03588702812826371
368
+ },
369
+ "harness|hendrycksTest-virology|5": {
370
+ "acc": 0.5602409638554217,
371
+ "acc_stderr": 0.03864139923699122,
372
+ "acc_norm": 0.5602409638554217,
373
+ "acc_norm_stderr": 0.03864139923699122
374
+ },
375
+ "harness|hendrycksTest-world_religions|5": {
376
+ "acc": 0.8362573099415205,
377
+ "acc_stderr": 0.028380919596145866,
378
+ "acc_norm": 0.8362573099415205,
379
+ "acc_norm_stderr": 0.028380919596145866
380
+ },
381
+ "harness|truthfulqa:mc|0": {
382
+ "mc1": 0.5189718482252142,
383
+ "mc1_stderr": 0.01749089640576236,
384
+ "mc2": 0.6631825155680797,
385
+ "mc2_stderr": 0.01527641053841743
386
+ },
387
+ "harness|winogrande|5": {
388
+ "acc": 0.8437253354380426,
389
+ "acc_stderr": 0.01020535179187352
390
+ },
391
+ "harness|gsm8k|5": {
392
+ "acc": 0.7172100075815011,
393
+ "acc_stderr": 0.012405020417873619
394
+ }
395
+ }
396
+
397
+ ```