import streamlit as st from datasets import load_dataset from transformer_ranker import TransformerRanker, prepare_popular_models st.title("Choose Your Transformer") # 1) Select multiple models from HuggingFace model hub model_options = ['prajjwal1/bert-tiny', 'google/electra-small-discriminator', 'microsoft/deberta-v3-small', 'bert-base-uncased', 'bert-base-cased', 'distilbert-base-uncased', 'roberta-base'] # List of models selected_models = st.multiselect("Select Models", model_options, default=model_options[:1]) # 2) Select dataset names (from text classification or token classification subcategory) dataset_options = ['trec', 'conll2003'] # Example datasets; you can expand this list selected_dataset = st.selectbox("Select Dataset", dataset_options) # 3) Select the parameter for dataset downsampling downsample_values = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] downsample_ratio = st.select_slider("Dataset Downsample Ratio", options=downsample_values, value=0.2) # 4) Select the parameter for "layer selection" with layermean as the default layer_options = ['lastlayer', 'layermean', 'bestlayer'] selected_layer = st.selectbox("Layer Selection", layer_options, index=1) # Add real-time logging in the future log_expander = st.expander("Expand to view log") log_placeholder = log_expander.empty() # Placeholder for log updates # Button to run the ranking process if st.button("Run Model Ranking"): with st.spinner("Running the transformer-ranker..."): # Step 1: Load the selected dataset dataset = load_dataset(selected_dataset) # Step 2: Prepare the selected models language_models = prepare_popular_models('base') if selected_models == [] else selected_models # Step 3: Initialize the ranker ranker = TransformerRanker(dataset, dataset_downsample=downsample_ratio) # Placeholder for log updates log_placeholder.text("Real-time logging will be added here...") # Run the ranker results = ranker.run(language_models, batch_size=64) # Display the final results st.write(results) st.success("Ranking is Done!")