cruxeval-x / app.py
Ruiyang1's picture
Update space
1a86feb
raw
history blame
No virus
7.42 kB
import json
import gradio as gr
import pandas as pd
from css_html import custom_css
from text_content import ABOUT_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, ACKNOWLEDGEMENT_TEXT, NOTES_TEXT, HEAD_TEXT
from utils import (
AutoEvalColumn,
fields,
lang_map,
)
result_path = './results.json'
task_type = ["input reasoning", "output reasoning"]
cur_task = "input"
next_task = "output"
with open(result_path, 'r') as f:
data = json.load(f)
rows = []
for model_name, sub_col in data.items():
row = {}
for lang in sub_col["pass@1"]:
if cur_task in lang:
row[lang_map[lang.replace(f"_{cur_task}", "")]] = sub_col["pass@1"][lang]
row['Average'] = sum(row.values()) / len(row.values())
row['Average'] = round(row['Average'], 1)
row['Model'] = model_name
row['Size'] = sub_col['size']
rows.append(row)
df = pd.DataFrame(rows)
df = df.sort_values(by='Average', ascending=False)
rows = []
for model_name, sub_col in data.items():
row = {}
for lang in sub_col["pass@1"]:
if next_task in lang:
row[lang_map[lang.replace(f"_{next_task}", "")]] = sub_col["pass@1"][lang]
row['Average'] = sum(row.values()) / len(row.values())
row['Average'] = round(row['Average'], 1)
row['Model'] = model_name
row['Size'] = sub_col['size']
rows.append(row)
df_next = pd.DataFrame(rows)
df_next = df_next.sort_values(by='Average', ascending=False)
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden]
COLS_LITE = [
c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]
TYPES_LITE = [
c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden
]
def select_columns(df, columns):
always_here_cols = [
AutoEvalColumn.model.name,
AutoEvalColumn.size.name,
]
# We use COLS to maintain sorting
filtered_df = df[
always_here_cols + [c for c in COLS if c in df.columns and c in columns]
]
return filtered_df
def select_tasks(df, columns, df_next):
always_here_cols = [
AutoEvalColumn.model.name,
AutoEvalColumn.size.name,
]
df,df_next = df_next,df
filtered_df = df[
always_here_cols + [c for c in COLS if c in df.columns and c in columns]
]
return df,filtered_df,df_next
demo = gr.Blocks(css=custom_css)
with demo:
with gr.Column():
gr.Markdown(
"""<div style="text-align: center;"><h1>CRUXEVAL-X Leaderboard</h1></div>\
<br>\
""",
elem_classes="markdown-text",
)
gr.Markdown(HEAD_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.Column():
with gr.Tabs(elem_classes="A100-tabs") as A100_tabs:
with gr.TabItem("πŸ” Evaluation Table", id=0):
with gr.Column():
with gr.Accordion("⏬ Tasks", open=True):
shown_tasks = gr.Radio(
choices=[
c
for c in task_type
],
value=[
c
for c in task_type
if cur_task in c
][0] if any(cur_task in c for c in task_type) else None,
label="",
elem_id="task-select",
interactive=True,
)
with gr.Accordion("⏬ Languages", open=True):
shown_languages = gr.CheckboxGroup(
choices=[
c
for c in COLS
if c
not in [
AutoEvalColumn.model.name,
AutoEvalColumn.size.name
]
],
value=[
c
for c in COLS_LITE
if c
not in [
AutoEvalColumn.model.name,
AutoEvalColumn.size.name
]
],
label="",
elem_id="column-select",
interactive=True,
)
leaderboard_df = gr.components.Dataframe(
value=df[
[
AutoEvalColumn.model.name,
AutoEvalColumn.size.name,
]
+ shown_languages.value
],
headers=COLS,
datatype=TYPES,
elem_id="leaderboard-table",
interactive=False,
)
hidden_leaderboard_df = gr.components.Dataframe(
value=df,
headers=COLS,
datatype=["str" for _ in range(len(COLS))],
visible=False,
)
leaderboard_next = gr.components.Dataframe(
value=df_next,
headers=COLS,
datatype=["str" for _ in range(len(COLS))],
visible=False,
)
shown_languages.change(
select_columns,
[hidden_leaderboard_df, shown_languages],
leaderboard_df,
)
shown_tasks.change(
select_tasks,
[hidden_leaderboard_df, shown_languages, leaderboard_next],
[hidden_leaderboard_df, leaderboard_df, leaderboard_next],
)
gr.Markdown(NOTES_TEXT, elem_classes="markdown-text")
with gr.TabItem("πŸ“ About", id=1):
gr.Markdown(ABOUT_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("πŸ“™ Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=10,
elem_id="citation-button",
show_copy_button=True,
)
with gr.Row():
with gr.Accordion("πŸ™ Acknowledgement", open=False):
gr.Markdown(ACKNOWLEDGEMENT_TEXT)
demo.launch()