overthink-1 / app.py
wenbopan's picture
Change length limit to 8k
6c4a187
raw
history blame contribute delete
No virus
4.92 kB
import time
import gradio as gr
from os import getenv
from openai import OpenAI
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key=getenv("OPENROUTER_API_KEY"),
)
css = """
body.show-thoughts .thought {
display: block !important;
}
.thought {
opacity: 0.8;
font-family: "Courier New", monospace;
border: 1px gray solid;
padding: 10px;
border-radius: 5px;
display: none;
}
.thought-prompt {
opacity: 0.8;
font-family: "Courier New", monospace;
}
"""
with open("contemplator.txt", "r") as f:
system_msg = f.read()
def make_thinking_prompt(time):
i = int(time * 4) % 40
if i > 20:
i = 40 - i
return "πŸ€” [" + "." * i + "Thinking" + "." * (20 - i) + "]"
def streaming(message, history, system_msg, model):
messages = [
{
"role": "system",
"content": system_msg
}
]
for user, assistant in history:
messages.append({
"role": "user",
"content": user
})
messages.append({
"role": "assistant",
"content": assistant
})
messages.append({
"role": "user",
"content": message
})
thinking_prompt = "<p class='thought-prompt'>" + "🀨 Understanding..." + "</p>"
yield thinking_prompt
completion = client.chat.completions.create(
model=model,
messages=messages,
max_completion_tokens=8000,
temperature=0.0,
stream=True,
)
reply = ""
start_time = time.time()
try:
for i, chunk in enumerate(completion):
reply += chunk.choices[0].delta.content
answer = ""
if not "</inner_thoughts>" in reply:
thought_text = f'<div class="thought">{reply.replace("<inner_thoughts>", "").strip()}</div>'
thinking_prompt = "<p class='thought-prompt'>" + make_thinking_prompt(time.time() - start_time) + "</p>"
else:
thought_text = f'<div class="thought">{reply.replace("<inner_thoughts>", "").split("</inner_thoughts>")[0].strip()}</div>'
answer = reply.split("</inner_thoughts>")[1].replace("<final_answer>", "").replace("</final_answer>", "").strip()
thinking_prompt = f"<p class='thought-prompt'>βŒ› Thought for {time.time() - start_time:.2f} seconds</p>"
yield thinking_prompt + thought_text + "<br>" + answer
yield thinking_prompt + thought_text + "<br>" + answer
except Exception as e:
print(e)
yield f"An error occurred. {e}"
markdown = """
## 🫐 Overthink 1(o1)
Insprired by how o1 works, this LLM is instructed to generate very long and detailed chain-of-thoughts. It will think extra hard before providing an answer.
Actually this does help with reasoning, compared to normal step-by-step reasoning. I wrote a blog post about this [here](https://huggingface.co/blog/wenbopan/recreating-o1).
Sometimes this LLM overthinks for super simple questions, but it's fun to watch. Hope you enjoy it!
### System Message
This is done by instructing the model with a large system message, which you can check on the top tab.
"""
with gr.Blocks(theme=gr.themes.Soft(), css=css, fill_height=True) as demo:
with gr.Row(equal_height=True):
with gr.Column(scale=1, min_width=300):
with gr.Tab("Settings"):
gr.Markdown(markdown)
model = gr.Dropdown(["nousresearch/hermes-3-llama-3.1-405b:free", "nousresearch/hermes-3-llama-3.1-70b", "meta-llama/llama-3.1-405b-instruct", "google/gemini-pro-1.5-exp", "meta-llama/llama-3.1-8b-instruct:free"], value="nousresearch/hermes-3-llama-3.1-405b:free", label="Model")
show_thoughts = gr.Checkbox(False, label="Show Thoughts", interactive=True, elem_id="show_thoughts")
show_thoughts.change(None, js="""function run(){ checked = document.querySelector('#show_thoughts input[type="checkbox"]').checked; document.querySelector('body').classList.toggle('show-thoughts', checked); } """)
with gr.Tab("System Message"):
system_msg = gr.TextArea(system_msg, label="System Message")
with gr.Column(scale=3, min_width=300):
gr.ChatInterface(
streaming,
additional_inputs=[
system_msg,
model
],
examples=[
["How do you do? ", None, None, None],
["How many R's in strawberry?", None, None, None],
["Solve the puzzle of 24 points: 1 2 3 4", None, None, None],
["Find x such that ⌈xβŒ‰ + x = 23/7. Express x as a common fraction.", None, None, None],
],
cache_examples=False
)
if __name__ == "__main__":
demo.launch()