wenbopan commited on
Commit
30f2a35
1 Parent(s): dbb93f6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -46
app.py CHANGED
@@ -1,63 +1,113 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- response = ""
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
 
33
  stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- response += token
40
- yield response
 
 
 
41
 
 
 
 
 
 
 
 
 
 
42
  """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
59
- )
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  if __name__ == "__main__":
63
  demo.launch()
 
1
+ import time
2
  import gradio as gr
3
+ from os import getenv
4
+ from openai import OpenAI
5
 
6
+ client = OpenAI(
7
+ base_url="https://openrouter.ai/api/v1",
8
+ api_key=getenv("OPENROUTER_API_KEY"),
9
+ )
10
+
11
+ css = """
12
+ .thought {
13
+ opacity: 0.8;
14
+ font-family: "Courier New", monospace;
15
+ border: 1px gray solid;
16
+ padding: 10px;
17
+ border-radius: 5px;
18
+ }
19
  """
 
 
 
20
 
21
+ js = """
22
 
23
+ """
 
 
 
 
 
 
 
 
24
 
25
+ with open("contemplator.txt", "r") as f:
26
+ system_msg = f.read()
 
 
 
27
 
28
+ def streaming(message, history, system_msg, model):
29
+ messages = [
30
+ {
31
+ "role": "system",
32
+ "content": system_msg
33
+ }
34
+ ]
35
+ for user, assistant in history:
36
+ messages.append({
37
+ "role": "user",
38
+ "content": user
39
+ })
40
+ messages.append({
41
+ "role": "assistant",
42
+ "content": assistant
43
+ })
44
 
45
+ messages.append({
46
+ "role": "user",
47
+ "content": message
48
+ })
49
 
50
+ completion = client.chat.completions.create(
51
+ model=model,
52
+ messages=messages,
53
+ max_completion_tokens=100000,
54
  stream=True,
55
+ )
56
+
57
+ reply = ""
58
+
59
+ start_time = time.time()
60
+ for i, chunk in enumerate(completion):
61
+ reply += chunk.choices[0].delta.content
62
+ answer = ""
63
+ if not "</inner_thoughts>" in reply:
64
+ thought_text = f'<div class="thought">{reply.replace("<inner_thoughts>", "").strip()}</div>'
65
+ else:
66
+ thought_text = f'<div class="thought">{reply.replace("<inner_thoughts>", "").split("</inner_thoughts>")[0].strip()}</div>'
67
+ answer = reply.split("</inner_thoughts>")[1].replace("<final_answer>", "").replace("</final_answer>", "").strip()
68
+ thinking_prompt = "<p>" + "Thinking" + "." * (i % 5 + 1) + "</p>"
69
+ yield thinking_prompt + thought_text + "<br>" + answer
70
 
71
+ thinking_prompt = f"<p>Thought for {time.time() - start_time:.2f} seconds</p>"
72
+ yield thinking_prompt + thought_text + "<br>" + answer
73
+
74
+ markdown = """
75
+ ## 🫐 Overthink 1(o1)
76
 
77
+ Insprired by how o1 works, this LLM is instructed to generate very long and detailed chain-of-thoughts. It will think extra hard before providing an answer.
78
+
79
+ Actually this does help with reasoning, compared to normal step-by-step reasoning. I wrote a blog post about this [here](https://huggingface.co/blog/wenbopan/recreating-o1).
80
+
81
+ Sometimes this LLM overthinks for super simple questions, but it's fun to watch. Hope you enjoy it!
82
+
83
+ ### System Message
84
+
85
+ This is done by instructing the model with a large system message, which you can check on the top tab.
86
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ with gr.Blocks(theme=gr.themes.Soft(), css=css, fill_height=True) as demo:
89
+ with gr.Row(equal_height=True):
90
+ with gr.Column(scale=1, min_width=300):
91
+ with gr.Tab("Settings"):
92
+ gr.Markdown(markdown)
93
+ model = gr.Dropdown(["nousresearch/hermes-3-llama-3.1-405b:free", "nousresearch/hermes-3-llama-3.1-70b", "meta-llama/llama-3.1-405b-instruct"], value="nousresearch/hermes-3-llama-3.1-405b:free", label="Model")
94
+ show_thoughts = gr.Checkbox(True, label="Show Thoughts", interactive=True)
95
+ with gr.Tab("System Message"):
96
+ system_msg = gr.TextArea(system_msg, label="System Message")
97
+ with gr.Column(scale=3, min_width=300):
98
+ gr.ChatInterface(
99
+ streaming,
100
+ additional_inputs=[
101
+ system_msg,
102
+ model
103
+ ],
104
+ examples=[
105
+ ["How do you do? ", None, None, None],
106
+ ["How many R's in strawberry?", None, None, None],
107
+ ["Solve the puzzle of 24 points: 2 4 9 1", None, None, None],
108
+ ["Find x such that ⌈x⌉ + x = 23/7. Express x as a common fraction.", None, None, None],
109
+ ],
110
+ )
111
 
112
  if __name__ == "__main__":
113
  demo.launch()