import time
import gradio as gr
from transformers import AutoTokenizer
import os
from pathlib import Path
from FastT5 import get_onnx_runtime_sessions, OnnxT5


trained_model_path = './t5_squad_v1/'

pretrained_model_name = Path(trained_model_path).stem

encoder_path = os.path.join(
    trained_model_path, f"{pretrained_model_name}-encoder_quantized.onnx")
decoder_path = os.path.join(
    trained_model_path, f"{pretrained_model_name}-decoder_quantized.onnx")
init_decoder_path = os.path.join(
    trained_model_path, f"{pretrained_model_name}-init-decoder_quantized.onnx")

model_paths = encoder_path, decoder_path, init_decoder_path
model_sessions = get_onnx_runtime_sessions(model_paths)
model = OnnxT5(trained_model_path, model_sessions)

tokenizer = AutoTokenizer.from_pretrained(trained_model_path)


def get_question(sentence, answer, mdl, tknizer):
    text = "context: {} answer: {}".format(sentence, answer)
    print(text)
    max_len = 256
    encoding = tknizer.encode_plus(
        text, max_length=max_len, pad_to_max_length=False, truncation=True, return_tensors="pt")
    input_ids, attention_mask = encoding["input_ids"], encoding["attention_mask"]
    outs = mdl.generate(input_ids=input_ids,
                        attention_mask=attention_mask,
                        early_stopping=True,
                        num_beams=5,
                        num_return_sequences=1,
                        no_repeat_ngram_size=2,
                        max_length=300)

    dec = [tknizer.decode(ids, skip_special_tokens=True) for ids in outs]

    Question = dec[0].replace("question:", "")
    Ouestion = Question.strip()
    return Question


# context = "Ramsri loves to watch cricket during his free time"
# answer = "cricket"
context = "Donald Trump is an American media personality and businessman who served as the 45th president of the United States."
answer = "Donald Trump"
ques = get_question(context, answer, model, tokenizer)
print("question: ", ques)


context = gr.components.Textbox(
    lines=5, placeholder="Enter paragraph/context here...")
answer = gr.components.Textbox(
    lines=3, placeholder="Enter answer/keyword here...")
question = gr.components.Textbox(type="text", label="Question")


def generate_question(context, answer):
    start_time = time.time()  # Record the start time
    result = get_question(context, answer, model, tokenizer)
    end_time = time.time()    # Record the end time
    latency = end_time - start_time  # Calculate latency
    print(f"Latency: {latency} seconds")
    return result


iface = gr.Interface(
    fn=generate_question,
    inputs=[context, answer],
    outputs=question
)

iface.launch(share=True)