import os
import threading
import time
import subprocess

#from transformers import pipeline
import ollama
import gradio

OLLAMA = os.path.expanduser("~/ollama")

if not os.path.exists(OLLAMA):
    subprocess.run("curl -L https://ollama.com/download/ollama-linux-amd64 -o ~/ollama", shell=True)
    os.chmod(OLLAMA, 0o755)


history = []

def ollama_service_thread():
    subprocess.run("~/ollama serve", shell=True)

OLLAMA_SERVICE_THREAD = threading.Thread(target=ollama_service_thread)
OLLAMA_SERVICE_THREAD.start()

print("Giving ollama serve a moment")
time.sleep(10)
subprocess.run("~/ollama pull tinydolphin:latest", shell=True)
     

def get_history_messages():
    messages = []
    for user, assist in history:
        messages.append({"role": "user", "content": user})
        messages.append({"role": "assistant", "content": assist})
    return messages


def predict(prompt):
    response = ollama.chat(
        model="tinydolphin",
        messages=[
            *get_history_messages(),
            {"role": "user", "content": prompt}
        ],
        stream=True
    )
    history.append((prompt, ""))
    message = ""
    for chunk in response:
        message += chunk["message"]["content"]
        history[-1] = (prompt, message)
        yield "", history


def predict_t(prompt):
    print("Predict:", prompt)
    print("Loading model")
    pipe = pipeline("conversational", model="cognitivecomputations/TinyDolphin-2.8-1.1b")
    print("Running pipeline")
    response = pipe(
        [
            *get_history_messages(),
            {"role": "user", "content": prompt}
        ],
    )
    history.append((prompt, response.messages[-1]["content"])) 
    print("Predict done")
    return "", history

with gradio.Blocks(fill_height=True) as demo:
    chat = gradio.Chatbot(scale=1)
    with gradio.Row(variant="compact"):
        prompt = gradio.Textbox(show_label=False, scale=6, autofocus=True)
        button = gradio.Button(scale=1)

    for handler in [button.click, prompt.submit]:
        handler(predict, inputs=[prompt], outputs=[prompt, chat])


if __name__ == '__main__':
    demo.launch()