Spaces:

sparse-generative-ai
/

open-moe-llm-leaderboard

Running

App Files Files Community

Yaofu3 commited on Mar 27

Commit

1c22d8d

•

1 Parent(s): 7c45643

support selecting inference framework

Browse files

Files changed (6) hide show

app.py +25 -7
src/backend/manage_requests.py +1 -0
src/backend/run_eval_suite.py +3 -2
src/display/utils.py +21 -0
src/leaderboard/read_evals.py +6 -0
src/populate.py +4 -0

app.py CHANGED Viewed

@@ -33,6 +33,7 @@ from src.display.utils import (
     TYPES,
     AutoEvalColumn,
     ModelType,
     fields,
     WeightType,
     Precision,
@@ -183,6 +184,14 @@ with demo:
                         )
                 with gr.Column(min_width=320):
                     filter_columns_type = gr.CheckboxGroup(
                         label="Model types",
                         choices=[t.to_str() for t in ModelType],
@@ -199,13 +208,13 @@ with demo:
                         elem_id="filter-columns-precision",
                     )
-                    filter_columns_size = gr.CheckboxGroup(
-                        label="Model sizes (in billions of parameters)",
-                        choices=list(NUMERIC_INTERVALS.keys()),
-                        value=list(NUMERIC_INTERVALS.keys()),
-                        interactive=True,
-                        elem_id="filter-columns-size",
-                    )
             # breakpoint()
@@ -308,6 +317,15 @@ with demo:
             with gr.Row():
                 gr.Markdown("# Submit your model here", elem_classes="markdown-text")
             with gr.Row():
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")

     TYPES,
     AutoEvalColumn,
     ModelType,
+    InferenceFramework,
     fields,
     WeightType,
     Precision,
                         )
                 with gr.Column(min_width=320):
+                    filter_columns_size = gr.CheckboxGroup(
+                        label="Inference frameworks",
+                        choices=[t.to_str() for t in InferenceFramework],
+                        value=[t.to_str() for t in InferenceFramework],
+                        interactive=True,
+                        elem_id="filter-columns-size",
+                    )
                     filter_columns_type = gr.CheckboxGroup(
                         label="Model types",
                         choices=[t.to_str() for t in ModelType],
                         elem_id="filter-columns-precision",
                     )
+                    # filter_columns_size = gr.CheckboxGroup(
+                    #     label="Model sizes (in billions of parameters)",
+                    #     choices=list(NUMERIC_INTERVALS.keys()),
+                    #     value=list(NUMERIC_INTERVALS.keys()),
+                    #     interactive=True,
+                    #     elem_id="filter-columns-size",
+                    # )
             # breakpoint()
             with gr.Row():
                 gr.Markdown("# Submit your model here", elem_classes="markdown-text")
+            with gr.Row():
+                inference_framework = gr.Dropdown(
+                    choices=[t.to_str() for t in InferenceFramework],
+                    label="Inference framework",
+                    multiselect=False,
+                    value=None,
+                    interactive=True,
+                )
             with gr.Row():
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")

src/backend/manage_requests.py CHANGED Viewed

@@ -16,6 +16,7 @@ class EvalRequest:
     json_filepath: str
     weight_type: str = "Original"
     model_type: str = ""  # pretrained, finetuned, with RL
     precision: str = ""  # float16, bfloat16
     base_model: Optional[str] = None  # for adapter models
     revision: str = "main"  # commit

     json_filepath: str
     weight_type: str = "Original"
     model_type: str = ""  # pretrained, finetuned, with RL
+    inference_framework: str = "HF-Chat"
     precision: str = ""  # float16, bfloat16
     base_model: Optional[str] = None  # for adapter models
     revision: str = "main"  # commit

src/backend/run_eval_suite.py CHANGED Viewed

@@ -42,13 +42,13 @@ def run_evaluation(
     # task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
     print(f"Selected Tasks: {task_names}")
-    print(f"Eval Request: {eval_request.get_model_args()}")
     print(
         f"Num Fewshot: {num_fewshot}, Batch Size: {batch_size}, Device: {device}, Use Cache: {use_cache}, Limit: {limit}"
     )
     # hf-chat is implemented to use apply_chat_template
     results = evaluator.simple_evaluate(
-        model="moe-infinity",  # "hf-causal-experimental",  # "hf-causal", hf-chat
         model_args=eval_request.get_model_args(),
         tasks=task_names,
         num_fewshot=num_fewshot,
@@ -65,6 +65,7 @@ def run_evaluation(
     results["config"]["model_dtype"] = eval_request.precision
     results["config"]["model_name"] = eval_request.model
     results["config"]["model_sha"] = eval_request.revision
     if max_nb_samples is not None:
         if "samples" in results:

     # task_names = utils.pattern_match(task_names, tasks.ALL_TASKS)
     print(f"Selected Tasks: {task_names}")
+    print(f"Eval Request: {eval_request}")
     print(
         f"Num Fewshot: {num_fewshot}, Batch Size: {batch_size}, Device: {device}, Use Cache: {use_cache}, Limit: {limit}"
     )
     # hf-chat is implemented to use apply_chat_template
     results = evaluator.simple_evaluate(
+        model=eval_request.inference_framework,  # "hf-causal-experimental",  # "hf-causal", hf-chat
         model_args=eval_request.get_model_args(),
         tasks=task_names,
         num_fewshot=num_fewshot,
     results["config"]["model_dtype"] = eval_request.precision
     results["config"]["model_name"] = eval_request.model
     results["config"]["model_sha"] = eval_request.revision
+    results["config"]["inference_framework"] = eval_request.inference_framework
     if max_nb_samples is not None:
         if "samples" in results:

src/display/utils.py CHANGED Viewed

@@ -70,6 +70,9 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
 # #Scores
 # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
 for task in Tasks:
     auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
@@ -129,6 +132,24 @@ class ModelType(Enum):
         return ModelType.Unknown
 class WeightType(Enum):
     Adapter = ModelDetails("Adapter")
     Original = ModelDetails("Original")

 # #Scores
 # # auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Avg", "number", True)])
+# Inference framework
+auto_eval_column_dict.append(["inference_framework", ColumnContent, ColumnContent("Inference framework", "str", True)])
 for task in Tasks:
     auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(task.value.col_name, "number", True)])
         return ModelType.Unknown
+class InferenceFramework(Enum):
+    # "moe-infinity", hf-chat
+    MoE_Infinity = ModelDetails("MoE-Infinity")
+    HF_Chat = ModelDetails("HF-Chat")
+    Unknown = ModelDetails("?")
+    def to_str(self):
+        return self.value.name
+    @staticmethod
+    def from_str(inference_framework: str):
+        if inference_framework in ["moe-infinity"]:
+            return InferenceFramework.MoE_Infinity
+        if inference_framework in ["hf-chat"]:
+            return InferenceFramework.HF_Chat
+        return InferenceFramework.Unknown
 class WeightType(Enum):
     Adapter = ModelDetails("Adapter")
     Original = ModelDetails("Original")

src/leaderboard/read_evals.py CHANGED Viewed

@@ -41,6 +41,7 @@ class EvalResult:
     num_params: int = 0
     date: str = ""  # submission date of request file
     still_on_hub: bool = False
     @staticmethod
     def init_from_json_file(json_filepath, is_backend: bool = False):
@@ -48,6 +49,8 @@ class EvalResult:
         with open(json_filepath) as fp:
             data = json.load(fp)
         # We manage the legacy config format
         config = data.get("config", data.get("config_general", None))
@@ -118,6 +121,7 @@ class EvalResult:
             revision=config.get("model_sha", ""),
             still_on_hub=still_on_hub,
             architecture=architecture,
         )
         return res
@@ -136,6 +140,7 @@ class EvalResult:
             self.likes = request.get("likes", 0)
             self.num_params = request.get("params", 0)
             self.date = request.get("submitted_time", "")
         except Exception as e:
             print(f"Could not find request file for {self.org}/{self.model} -- path: {requests_path} -- {e}")
@@ -166,6 +171,7 @@ class EvalResult:
             AutoEvalColumn.likes.name: self.likes,
             AutoEvalColumn.params.name: self.num_params,
             AutoEvalColumn.still_on_hub.name: self.still_on_hub,
         }
         for task in Tasks:

     num_params: int = 0
     date: str = ""  # submission date of request file
     still_on_hub: bool = False
+    inference_framework: str = "Unknown"
     @staticmethod
     def init_from_json_file(json_filepath, is_backend: bool = False):
         with open(json_filepath) as fp:
             data = json.load(fp)
+        inference_framework = data.get("inference_framework", "Unknown")
         # We manage the legacy config format
         config = data.get("config", data.get("config_general", None))
             revision=config.get("model_sha", ""),
             still_on_hub=still_on_hub,
             architecture=architecture,
+            inference_framework=inference_framework,
         )
         return res
             self.likes = request.get("likes", 0)
             self.num_params = request.get("params", 0)
             self.date = request.get("submitted_time", "")
+            self.inference_framework = request.get("inference_framework", "Unknown")
         except Exception as e:
             print(f"Could not find request file for {self.org}/{self.model} -- path: {requests_path} -- {e}")
             AutoEvalColumn.likes.name: self.likes,
             AutoEvalColumn.params.name: self.num_params,
             AutoEvalColumn.still_on_hub.name: self.still_on_hub,
+            AutoEvalColumn.inference_framework.name: self.inference_framework,
         }
         for task in Tasks:

src/populate.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 from tqdm import tqdm
 import copy
 import pandas as pd
 from src.display.formatting import has_no_nan_values, make_clickable_model
 from src.display.utils import AutoEvalColumn, EvalQueueColumn
@@ -63,6 +64,9 @@ def get_leaderboard_df(
     # if AutoEvalColumn.average.name in df:
     #     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     if not df.empty:
         df = df[cols].round(decimals=2)

 from tqdm import tqdm
 import copy
 import pandas as pd
+import numpy as np
 from src.display.formatting import has_no_nan_values, make_clickable_model
 from src.display.utils import AutoEvalColumn, EvalQueueColumn
     # if AutoEvalColumn.average.name in df:
     #     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
+    for col in cols:
+        if col not in df.columns:
+            df[col] = np.nan
     if not df.empty:
         df = df[cols].round(decimals=2)