|
import pandas as pd |
|
from deta import Deta |
|
import streamlit as st |
|
|
|
import streamlit as st |
|
|
|
with st.echo(): |
|
st.write(f"streamlit version: {st.__version__}") |
|
|
|
|
|
st.set_page_config(page_title="🤗 Persian LLM Leaderboard", page_icon=":bar_chart:", layout="wide") |
|
|
|
with open('.streamlit/style.css') as f: |
|
st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True) |
|
|
|
st.markdown("<h1>🤗 Open Persian LLM Leaderboard</h1>", unsafe_allow_html=True) |
|
|
|
|
|
DETA_KEY = st.secrets["DETA_KEY"] |
|
deta = Deta(DETA_KEY) |
|
|
|
database = deta.Base("submitted-models") |
|
|
|
def insert_model(data): return database.put(data) |
|
def fetch_all_models(): return database.fetch().items |
|
def get_model_name(model): return database.get(model) |
|
|
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
|
|
tab1, tab2, tab3 = st.tabs([ "\u2001\u2001\u2001 LLM Benchmark\u2001\u2001\u2001", |
|
"\u2001\u2001\u2001 Submit A Model\u2001\u2001\u2001", |
|
"\u2001\u2001\u2001 About Leaderboard\u2001\u2001\u2001"]) |
|
|
|
|
|
|
|
summ_eval_metrics = ['BLEU', 'CHARF', 'TER'] |
|
qas_eval_metrics = ['F1', 'EXACT-MATCH'] |
|
mts_eval_metrics = ['CHARF', 'BLEU', 'TER'] |
|
mcq_eval_metrics = ['MC1', 'MC2'] |
|
|
|
|
|
|
|
with tab1: |
|
c, col1, cc, col2 = st.columns([.55, 2, .3, 2], gap="small") |
|
|
|
with col1: |
|
eval_tasks = st.radio( "Select An Evaluation Task:", |
|
('Text Summarization', 'Question Answering', |
|
'Machine Translation', 'Multiple Choice QNs'), |
|
horizontal=True) |
|
|
|
with col2: |
|
model_type = st.radio( "Select A Model Type:", |
|
('All', 'Quantized', 'Pretrained', |
|
'Fine\u2013tuned', 'Instruction\u2014tuned'), |
|
horizontal=True) |
|
|
|
|
|
if eval_tasks=='Text Summarization': |
|
|
|
select_eval_metrics = st.multiselect( 'Select Multiple Evaluation Metrics:', summ_eval_metrics, ['BLEU', 'CHARF', 'TER']) |
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
summ_eval_data = { 'Type' : ['Quantized', 'Pretrained', 'Fine\u2013tuned', 'Instruction\u2014tuned'], |
|
'Model': ['username/model1', 'username/model2', 'username/model3', 'username/model4'], |
|
'BLEU' : [70, 60, 50, 40], |
|
'CHARF': [40, 50, 60, 70], |
|
'TER' : [50, 70, 40, 60]} |
|
|
|
llm__dataframe = pd.DataFrame(summ_eval_data) |
|
|
|
if model_type in ['Quantized', 'Pretrained', 'Fine\u2013tuned', 'Instruction\u2014tuned']: |
|
llm__dataframe = llm__dataframe.loc[llm__dataframe['Type'] == model_type] |
|
|
|
selected_columns = ['Model', 'Type'] + select_eval_metrics |
|
|
|
llm__dataframe = llm__dataframe[selected_columns] |
|
|
|
llm__dataframe['Model'] = llm__dataframe['Model'].apply(lambda x: f'https://huggingface.co/{x}') |
|
|
|
st.checkbox("Use container width ▶️", value=True, key="use_container_width") |
|
|
|
st.data_editor(llm__dataframe, column_config={"Model": st.column_config.LinkColumn("Model")}, |
|
hide_index=True, use_container_width=st.session_state.use_container_width, key="data_editor") |
|
|
|
|
|
elif eval_tasks=='Question Answering': |
|
|
|
select_eval_metrics = st.multiselect('Select Multiple Evaluation Metrics:', qas_eval_metrics, ['F1', 'EXACT-MATCH']) |
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
qas_eval_data = { 'Type' : ['Quantized', 'Pretrained', 'Fine\u2013tuned', 'Instruction\u2014tuned'], |
|
'Model': ['username/model1', 'username/model2', 'username/model3', 'username/model4'], |
|
'F1' : [70, 60, 50, 40], |
|
'EXACT-MATCH': [40, 50, 60, 70]} |
|
|
|
llm__dataframe = pd.DataFrame(qas_eval_data) |
|
|
|
if model_type in ['Quantized', 'Pretrained', 'Fine\u2013tuned', 'Instruction\u2014tuned']: |
|
llm__dataframe = llm__dataframe.loc[llm__dataframe['Type'] == model_type] |
|
|
|
selected_columns = ['Model', 'Type'] + select_eval_metrics |
|
|
|
llm__dataframe = llm__dataframe[selected_columns] |
|
|
|
llm__dataframe['Model'] = llm__dataframe['Model'].apply(lambda x: f'https://huggingface.co/{x}') |
|
|
|
st.checkbox("Use container width ▶️", value=True, key="use_container_width") |
|
|
|
st.data_editor(llm__dataframe, column_config={"Model": st.column_config.LinkColumn("Model")}, |
|
hide_index=True, use_container_width=st.session_state.use_container_width, key="data_editor1") |
|
|
|
|
|
if eval_tasks=='Machine Translation': |
|
|
|
select_eval_metrics = st.multiselect( 'Select Multiple Evaluation Metrics:', mts_eval_metrics, ['BLEU', 'CHARF', 'TER']) |
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
mts_eval_data = { 'Type' : ['Quantized', 'Pretrained', 'Fine\u2013tuned', 'Instruction\u2014tuned'], |
|
'Model': ['username/model1', 'username/model2', 'username/model3', 'username/model4'], |
|
'BLEU' : [70, 60, 50, 40], |
|
'CHARF': [40, 50, 60, 70], |
|
'TER' : [50, 70, 40, 60]} |
|
|
|
llm__dataframe = pd.DataFrame(mts_eval_data) |
|
|
|
if model_type in ['Quantized', 'Pretrained', 'Fine\u2013tuned', 'Instruction\u2014tuned']: |
|
llm__dataframe = llm__dataframe.loc[llm__dataframe['Type'] == model_type] |
|
|
|
selected_columns = ['Model', 'Type'] + select_eval_metrics |
|
|
|
llm__dataframe = llm__dataframe[selected_columns] |
|
|
|
llm__dataframe['Model'] = llm__dataframe['Model'].apply(lambda x: f'https://huggingface.co/{x}') |
|
|
|
st.checkbox("Use container width ▶️", value=True, key="use_container_width") |
|
|
|
st.data_editor(llm__dataframe, column_config={"Model": st.column_config.LinkColumn("Model")}, |
|
hide_index=True, use_container_width=st.session_state.use_container_width, key="data_editor2") |
|
|
|
|
|
if eval_tasks=='Multiple Choice QNs': |
|
|
|
select_eval_metrics = st.multiselect('Select Multiple Evaluation Metrics:', mcq_eval_metrics, ['MC1', 'MC2']) |
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
mcq_eval_data = { 'Type' : ['Quantized', 'Pretrained', 'Fine\u2013tuned', 'Instruction\u2014tuned'], |
|
'Model': ['username/model1', 'username/model2', 'username/model3', 'username/model4'], |
|
'MC1' : [70, 60, 50, 40], |
|
'MC2': [40, 50, 60, 70]} |
|
|
|
llm__dataframe = pd.DataFrame(mcq_eval_data) |
|
|
|
if model_type in ['Quantized', 'Pretrained', 'Fine\u2013tuned', 'Instruction\u2014tuned']: |
|
llm__dataframe = llm__dataframe.loc[llm__dataframe['Type'] == model_type] |
|
|
|
selected_columns = ['Model', 'Type'] + select_eval_metrics |
|
|
|
llm__dataframe = llm__dataframe[selected_columns] |
|
|
|
llm__dataframe['Model'] = llm__dataframe['Model'].apply(lambda x: f'https://huggingface.co/{x}') |
|
|
|
st.checkbox("Use container width ▶️", value=True, key="use_container_width") |
|
|
|
st.data_editor(llm__dataframe, column_config={"Model": st.column_config.LinkColumn("Model")}, |
|
hide_index=True, use_container_width=st.session_state.use_container_width, key="data_editor3") |
|
|
|
|
|
|
|
with tab2: |
|
|
|
submitted_models = pd.DataFrame(columns=['Model Name','Model HF Name', 'Model Type','Model Precision','Evaluation Tasks']) |
|
|
|
c, col1 , col2, cc = st.columns([0.2, 1, 3, 0.2], gap="small") |
|
|
|
with col1: |
|
model_name = st.text_input("Enter Model Name (required):", placeholder="Enter model's short name", key="model_name") |
|
|
|
|
|
with col2: |
|
model_link = st.text_input("Enter Model HuggingFace Name:", placeholder="Enter model's HF Name: username/model", key="model_link") |
|
|
|
|
|
c, col1 , col2, col3, cc = st.columns([0.2, 1, 1, 2, 0.2], gap="small") |
|
|
|
with col1: |
|
model_type = ['Quantized', 'Pretrained', 'Fine\u2013tuned', 'Instruction\u2014tuned'] |
|
selected_model_type = st.selectbox('Select Model Type:', (model_type)) |
|
|
|
with col2: |
|
model_precision = ['float32', 'float16', 'bfloat16', '8bit (LLM.int8)', '4bit (QLoRA/FP4)'] |
|
selected_model_precision = st.selectbox('Select Model Precision:', (model_precision)) |
|
|
|
with col3: |
|
eval_tasks = ['All Tasks', 'Text Summarization', 'Question Answering', 'Machine Translation', 'Multiple Choice QNs'] |
|
selected_eval_tasks = st.selectbox('Select An Evaluation Task:', (eval_tasks)) |
|
|
|
|
|
st.markdown("##") |
|
|
|
|
|
c, col1 , col2, cc = st.columns([2, 1, 1, 2], gap="small") |
|
|
|
with col1: |
|
def clear_text(): |
|
st.session_state["model_name"] = "" |
|
st.session_state["model_link"] = "" |
|
|
|
submit_button = st.button('Submit Model', key="submit") |
|
|
|
if submit_button==True and model_name!='' and model_link!='': |
|
response = get_model_name(model_name) |
|
if response==None: |
|
model_name_exist=False |
|
input_data = {'key': model_name, 'Model Name': model_name, 'Model HF Name': model_link, 'Model Type': selected_model_type, |
|
'Model Precision': selected_model_precision, 'Evaluation Tasks': selected_eval_tasks} |
|
insert_model(input_data) |
|
submitted_models = submitted_models.append(pd.DataFrame(input_data, index=[0]), ignore_index=True) |
|
submitted_models = submitted_models[['Model Name','Model HF Name', 'Model Type','Model Precision','Evaluation Tasks']] |
|
else: model_name_exist=True |
|
|
|
elif submit_button==True and model_name!='' and model_link=='': |
|
response = get_model_name(model_name) |
|
if response==None: |
|
model_name_exist=False |
|
input_data = {'key': model_name, 'Model Name': model_name, 'Model HF Name': None, 'Model Type': selected_model_type, |
|
'Model Precision': selected_model_precision, 'Evaluation Tasks': selected_eval_tasks} |
|
insert_model(input_data) |
|
submitted_models = submitted_models.append(pd.DataFrame(input_data, index=[0]), ignore_index=True) |
|
submitted_models = submitted_models[['Model Name','Model HF Name', 'Model Type','Model Precision','Evaluation Tasks']] |
|
else: model_name_exist=True |
|
|
|
else: pass |
|
|
|
|
|
with col2: |
|
st.button('Clear Form', on_click=clear_text) |
|
|
|
st.markdown("##") |
|
|
|
c, col1 , col2 = st.columns([0.15, 3, 0.15], gap="small") |
|
|
|
with col1: |
|
if submit_button==True and model_name!='' and model_link!='' and model_name_exist==False: |
|
st.success("You have submitted your model successfully", icon="") |
|
st.data_editor(submitted_models, hide_index=True, use_container_width=st.session_state.use_container_width) |
|
|
|
elif submit_button==True and model_name!='' and model_link=='' and model_name_exist==False: |
|
st.warning("You have submitted your model, but the model's HuggingFace name is missing", icon="⚠️") |
|
st.data_editor(submitted_models, hide_index=True, use_container_width=st.session_state.use_container_width) |
|
|
|
elif submit_button==True and model_name=='' and model_link!='': |
|
st.error("You have not submitted the required information", icon="") |
|
|
|
elif submit_button==True and model_name=='' and model_link=='': |
|
st.error("You have not submitted the required information", icon="") |
|
|
|
elif submit_button==True and model_name!='' and model_link!='' and model_name_exist==True: |
|
st.error("The model already submitted. Contact admin for help: { info@wishwork.org }", icon="") |
|
|
|
elif submit_button==True and model_name!='' and model_link=='' and model_name_exist==True: |
|
st.error("The model already submitted. Contact admin for help: { info@wishwork.org }", icon="") |
|
|
|
else: pass |
|
|
|
st.markdown("##") |
|
|
|
c, col1 , col2 = st.columns([0.15, 3, 0.15], gap="small") |
|
|
|
with col1: |
|
with st.expander("Recently Submitted Models for Evaluation ⬇️"): |
|
try: |
|
all_submitted_models = pd.DataFrame(data=fetch_all_models()) |
|
all_submitted_models = all_submitted_models[['Model Name','Model HF Name', 'Model Type','Model Precision','Evaluation Tasks']] |
|
st.data_editor(all_submitted_models, hide_index=True, use_container_width=st.session_state.use_container_width, key="data_editor4") |
|
except KeyError: |
|
st.info('There are no submitted models for evaluation at this moment 😆', icon="ℹ️") |
|
|
|
|
|
|
|
footer="""<div class="footer"> <p class="p1">Copyright © 2023 <a text-align: center;' href="https://www.wishwork.org" target="_blank">Wish Work Inc.</a></p> </div>""" |
|
st.markdown(footer, unsafe_allow_html=True) |