K-Sort-Arena / app.py
ksort's picture
Update default markdown
4e4bd20
raw
history blame contribute delete
No virus
3.77 kB
import gradio as gr
import os
from serve.gradio_web import *
from serve.gradio_web_video import build_side_by_side_video_ui_anony
from serve.leaderboard import build_leaderboard_tab, build_leaderboard_video_tab, build_leaderboard_contributor
from model.model_manager import ModelManager
from pathlib import Path
from serve.constants import SERVER_PORT, ROOT_PATH, ELO_RESULTS_DIR
def make_default_md():
link_color = "#1976D2" # This color should be clear in both light and dark mode
leaderboard_md = f"""
# πŸ… K-Sort Arena: Efficient and Reliable Benchmarking for Generative Models via K-wise Human Preferences
### [Paper](https://arxiv.org/abs/2408.14468) | [Twitter](https://x.com/_akhaliq/status/1828280979242320014)
- ⚑ For vision tasks, K-wise comparisons can provide much richer info but only take similar time as pairwise comparisons.
- 🎯 Well designed matchmaking algorithm can further save human efforts than random match pairing in normal Arena.
- πŸ“ˆ Probabilistic modeling can obtain a faster and more stable convergence than Elo scoring system.
"""
return leaderboard_md
def build_combine_demo(models, elo_results_file, leaderboard_table_file):
with gr.Blocks(
title="Play with Open Vision Models",
theme=gr.themes.Default(),
css=block_css,
) as demo:
with gr.Blocks():
md = make_default_md()
md_default = gr.Markdown(md, elem_id="default_leaderboard_markdown")
with gr.Tabs() as tabs_combine:
with gr.Tab("Image Generation", id=0):
with gr.Tabs() as tabs_ig:
with gr.Tab("Generation Leaderboard", id=0):
build_leaderboard_tab()
with gr.Tab("Generation Arena (battle)", id=1):
build_side_by_side_ui_anony(models)
with gr.Tab("Video Generation", id=1):
with gr.Tabs() as tabs_ig:
with gr.Tab("Generation Leaderboard", id=0):
build_leaderboard_video_tab()
with gr.Tab("Generation Arena (battle)", id=1):
build_side_by_side_video_ui_anony(models)
with gr.Tab("Contributor", id=2):
build_leaderboard_contributor()
return demo
def load_elo_results(elo_results_dir):
from collections import defaultdict
elo_results_file = defaultdict(lambda: None)
leaderboard_table_file = defaultdict(lambda: None)
if elo_results_dir is not None:
elo_results_dir = Path(elo_results_dir)
elo_results_file = {}
leaderboard_table_file = {}
for file in elo_results_dir.glob('elo_results_*.pkl'):
if 't2i_generation' in file.name:
elo_results_file['t2i_generation'] = file
# else:
# raise ValueError(f"Unknown file name: {file.name}")
for file in elo_results_dir.glob('*_leaderboard.csv'):
if 't2i_generation' in file.name:
leaderboard_table_file['t2i_generation'] = file
# else:
# raise ValueError(f"Unknown file name: {file.name}")
return elo_results_file, leaderboard_table_file
if __name__ == "__main__":
server_port = int(SERVER_PORT)
root_path = ROOT_PATH
elo_results_dir = ELO_RESULTS_DIR
models = ModelManager()
elo_results_file, leaderboard_table_file = load_elo_results(elo_results_dir)
demo = build_combine_demo(models, elo_results_file, leaderboard_table_file)
demo.queue(max_size=20).launch(server_port=server_port, root_path=ROOT_PATH, share=True)
# demo.launch(server_name="0.0.0.0", server_port=7860, root_path=ROOT_PATH)