import os import streamlit as st import streamlit.components.v1 as components from datasets import load_dataset st.set_page_config(page_title="Gaia Search", layout="wide") os.makedirs(os.path.join(os.getcwd(), ".streamlit"), exist_ok=True) with open(os.path.join(os.getcwd(), ".streamlit/config.toml"), "w") as file: file.write('[theme]\nbase="light"') st.sidebar.markdown( """
Gaia Search 🌖🌏
A search engine for the LAION large scale image caption corpora
""", unsafe_allow_html=True, ) st.sidebar.markdown( """ """, unsafe_allow_html=True, ) query = st.sidebar.text_input(label="Search query", value="") footer = """ """ st.sidebar.markdown(footer, unsafe_allow_html=True) searcher = LuceneSearcher("index") ds = load_dataset("imdb", split="train") def search(query): hits = searcher.search(query, k=10) results = ds.select([int(hit.docid) for hit in hits]) return results + "