Spaces:
AIR-Bench
/
Running on CPU Upgrade

File size: 3,599 Bytes
8b7a945
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from dataclasses import dataclass
from enum import Enum


def get_safe_name(name: str):
    """Get RFC 1123 compatible safe name"""
    name = name.replace('-', '_')
    return ''.join(
        character.lower()
        for character in name
        if (character.isalnum() or character == '_'))


dataset_dict = {
    "qa": {
        "wiki": {
            "en": ["wikipedia_20240101", ],
            "zh": ["wikipedia_20240101", ]
        },
        "web": {
            "en": ["mC4", ],
            "zh": ["mC4", ]
        },
        "news": {
            "en": ["CC-News", ],
            "zh": ["CC-News", ]
        },
        "health": {
            "en": ["PubMedQA", ],
            "zh": ["Huatuo-26M", ]
        },
        "law": {
            "en": ["pile-of-law", ],
            "zh": ["flk_npc_gov_cn", ]
        },
        "finance": {
            "en": ["Reuters-Financial", ],
            "zh": ["FinCorpus", ]
        },
        "arxiv": {
            "en": ["Arxiv", ]},
    },
    "long_doc": {
        "arxiv": {
            "en": ["gpt-3", "llama2", "llm-survey", "gemini"],
        },
        "book": {
            "en": [
                "origin-of-species_darwin",
                "a-brief-history-of-time_stephen-hawking"
            ]
        },
        "healthcare": {
            "en": [
                "pubmed_100K-200K_1",
                "pubmed_100K-200K_2",
                "pubmed_100K-200K_3",
                "pubmed_40K-50K_5-merged",
                "pubmed_30K-40K_10-merged"
            ]
        },
        "law": {
            "en": [
                "lex_files_300K-400K",
                "lex_files_400K-500K",
                "lex_files_500K-600K",
                "lex_files_600K-700K"
            ]
        }
    }
}

metric_list = [
    "ndcg_at_1",
    "ndcg_at_3",
    "ndcg_at_5",
    "ndcg_at_10",
    "ndcg_at_100",
    "ndcg_at_1000",
    "map_at_1",
    "map_at_3",
    "map_at_5",
    "map_at_10",
    "map_at_100",
    "map_at_1000",
    "recall_at_1",
    "recall_at_3",
    "recall_at_5",
    "recall_at_10"
    "recall_at_100",
    "recall_at_1000",
    "precision_at_1",
    "precision_at_3",
    "precision_at_5",
    "precision_at_10",
    "precision_at_100",
    "precision_at_1000",
    "mrr_at_1",
    "mrr_at_3",
    "mrr_at_5",
    "mrr_at_10",
    "mrr_at_100",
    "mrr_at_1000"
]


@dataclass
class Benchmark:
    name: str  # [task]_[domain]_[language]_[metric], task_key in the json file,
    metric: str  # ndcg_at_1 ,metric_key in the json file
    col_name: str  # [domain]_[language], name to display in the leaderboard

benchmark_dict = {}
for task, domain_dict in dataset_dict.items():
    for domain, lang_dict in domain_dict.items():
        for lang, dataset_list in lang_dict.items():
            if task == "qa":
                benchmark_name = f"{task}_{domain}_{lang}"
                benchmark_name = get_safe_name(benchmark_name)
                col_name = f"{domain}_{lang}"
                for metric in dataset_list:
                    benchmark_dict[benchmark_name] = Benchmark(benchmark_name, metric, col_name)
            elif task == "long_doc":
                for dataset in dataset_list:
                    col_name = f"{domain}_{lang}_{dataset}"
                    for metric in metric_list:
                        benchmark_name = f"{task}_{domain}_{lang}_{dataset}_{metric}"
                        benchmark_name = get_safe_name(benchmark_name)
                        benchmark_dict[benchmark_name] = Benchmark(benchmark_name, metric, col_name)

Benchmarks = Enum('Benchmarks', benchmark_dict)