import streamlit as st
from streamlit.logger import get_logger
from timeit import default_timer as timer
import sqlite3
import pandas as pd

LOGGER = get_logger(__name__)
    
def preprocess(s:str)->str:
    return s.replace('"','').replace('על','').replace('פרק','').replace('פסוק','').replace('דף','').replace('עמוד','').replace('סימן','').replace('סעיף','').replace('חידושי','').replace("'",'')
        
@st.cache_resource
def get_dfs()->object:
    print('hello from get_dfs..')
        
    # //get the books table//
    # Connect to the database
    conn = sqlite3.connect('test42.db')

    # Query the database and retrieve the results
    cursor = conn.execute("SELECT * FROM titles")
    results = cursor.fetchall()

    # Convert the query results into a Pandas DataFrame
    titles = pd.DataFrame(results)
    titles.columns=list(map(lambda x: x[0], cursor.description))
    
    # //get the texts table//
    # Query the database and retrieve the results
    cursor = conn.execute("SELECT * FROM texts")
    results = cursor.fetchall()

    # Convert the query results into a Pandas DataFrame
    texts = pd.DataFrame(results)
    texts.columns=list(map(lambda x: x[0], cursor.description))
    
    # //get the references database
   # Query the database and retrieve the results
    cursor = conn.execute("SELECT * FROM refs")
    results = cursor.fetchall()

    # Convert the query results into a Pandas DataFrame
    refs = pd.DataFrame(results)
    refs.columns=list(map(lambda x: x[0], cursor.description))
   
   # Query the database and retrieve the results
    cursor = conn.execute("SELECT * FROM books")
    results = cursor.fetchall()

    # Convert the query results into a Pandas DataFrame
    books = pd.DataFrame(list(results))
    books.columns=list(map(lambda x: x[0], cursor.description))
    
    #merge the books and refs with the texts
    merged = pd.merge(texts,books,how='inner',left_on='bid',right_on='_id')
    texts_df = pd.merge(merged,refs,left_on='_id_x',right_on='tid')
    
    titles_df = titles
    
    return titles_df, texts_df
    

def find_ref(titles_df,texts_df,input_text,top_k,num_of_results,algorithm):
    from rapidfuzz import fuzz, process as rapidfuzz_process
    from rapidfuzz.fuzz import token_ratio,ratio,partial_ratio,token_set_ratio,partial_token_set_ratio,token_sort_ratio, WRatio

    print('hello from find_ref..')

    if not input_text: return
    print(eval(algorithm))
    results = []    
    books = titles_df['he_titles']
    input_text = input_text.replace(':','עמוד ב').replace('.','עמוד א')
    scorer = eval(algorithm)
    # search only the references database in case the user set the top_k to 0
    if top_k == 0:
        refs = texts_df['ref_text_long'].unique()
        for ref, ref_score, _ in rapidfuzz_process.extract(input_text, refs,scorer=scorer, limit=num_of_results,processor=preprocess):
           results += [{'ref':ref,'ref_score':ref_score}]
    
    else:
        # search first only in the books database (for top_k books)
        for book, book_score, _ in rapidfuzz_process.extract(input_text, books, scorer=scorer, limit=top_k,processor=preprocess):
            # get all the references of that book
            book_title = list(titles_df.loc[titles_df['he_titles']==book]['title'])[0]
            refs = texts_df.loc[texts_df['title']==book_title]['ref_text_long'].unique()
            # then search these references and add them all to the results
            for ref, ref_score, _ in rapidfuzz_process.extract(input_text, refs, limit=10,scorer=scorer,processor=preprocess):
                results += [{'ref':ref,'ref_score':ref_score,'book':book,'book_score':book_score}]
        # finaly, sort all the references by their own score (and not the book score)
        results.sort(key=lambda x: x['ref_score'],reverse=True)
    
    return results[:num_of_results]


def run():
    
    st.set_page_config(
        page_title=" חיפוש מקורות",
        page_icon="📚",
        layout="wide",
        initial_sidebar_state="expanded"    
    )
    get_dfs()
    st.write("# חיפוש מקורות באמצעות מרחק לוינשטיין")

    titles_df,texts_df = get_dfs()
    user_input = st.text_input('כתוב את המקור המבוקש', placeholder='בבא קמא דף ב עמוד ב') 
    top_k =  st.sidebar.slider('כמה ספרים לסרוק top_k:',0,20,10)
    num_of_results = st.sidebar.slider('מספר התוצאות שברצונך להציג:',1,25,5)
    algorithm = st.sidebar.selectbox('האלגוריתם לדירוג התוצאות',['token_ratio','ratio','WRatio','partial_ratio','token_set_ratio','partial_token_set_ratio','token_sort_ratio'])
    
    if user_input!="":
        time0 = timer()
        results = find_ref(titles_df,texts_df,user_input,top_k,num_of_results,algorithm)
        time = f"finished in {1e3*(timer()-time0):.1f} ms"
        st.write(time)
        buttons = []
        for i, result in enumerate(results):
            st.write(result)
            buttons.append(st.button("פתח " +result['ref'],i))
            if buttons[i]:
                st.write(texts_df.loc[texts_df['ref_text_long']==result['ref']][['heText','ref_text_long']])
            

if __name__ == "__main__":
    run()