Omnibus commited on
Commit
5d5363d
1 Parent(s): 9b2cb9f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -39
app.py CHANGED
@@ -17,48 +17,60 @@ InferenceClient(models[2]),
17
  InferenceClient(models[3]),
18
  ]
19
 
 
20
 
21
  def format_prompt(message, history):
22
  prompt = ""
23
  if history:
24
  #<start_of_turn>userHow does the brain work?<end_of_turn><start_of_turn>model
25
  for user_prompt, bot_response in history:
26
- prompt += f"<start_of_turn>user{user_prompt}<end_of_turn>"
27
- prompt += f"<start_of_turn>model{bot_response}"
 
 
28
  prompt += f"<start_of_turn>user{message}<end_of_turn><start_of_turn>model"
29
  return prompt
30
 
31
-
32
-
33
- def chat_inf(system_prompt,prompt,history,client_choice,seed,temp,tokens,top_p,rep_p):
34
  #token max=8192
 
35
  client=clients[int(client_choice)-1]
36
  if not history:
37
  history = []
38
  hist_len=0
39
- if history:
40
- hist_len=len(history)
41
- print(hist_len)
42
-
43
- #seed = random.randint(1,1111111111111111)
44
- generate_kwargs = dict(
45
- temperature=temp,
46
- max_new_tokens=tokens,
47
- top_p=top_p,
48
- repetition_penalty=rep_p,
49
- do_sample=True,
50
- seed=seed,
51
- )
52
- #formatted_prompt=prompt
53
- formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
54
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
55
- output = ""
56
-
57
- for response in stream:
58
- output += response.token.text
59
- yield [(prompt,output)]
60
- history.append((prompt,output))
61
- yield history
 
 
 
 
 
 
 
 
 
 
62
 
63
  def get_screenshot(chat: list,height=5000,width=600,chatblock=[],theme="light",wait=3000,header=True):
64
  print(chatblock)
@@ -70,20 +82,18 @@ def get_screenshot(chat: list,height=5000,width=600,chatblock=[],theme="light",w
70
  print(out)
71
  return out
72
 
73
-
74
-
75
  def clear_fn():
76
- return None
77
  rand_val=random.randint(1,1111111111111111)
 
78
  def check_rand(inp,val):
79
  if inp==True:
80
  return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=random.randint(1,1111111111111111))
81
  else:
82
  return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
83
-
84
-
85
 
86
  with gr.Blocks() as app:
 
87
  gr.HTML("""<center><h1 style='font-size:xx-large;'>Google Gemma Models</h1><br><h3>running on Huggingface Inference Client</h3><br><h7>EXPERIMENTAL""")
88
  chat_b = gr.Chatbot(height=500)
89
  with gr.Group():
@@ -99,15 +109,15 @@ with gr.Blocks() as app:
99
  stop_btn=gr.Button("Stop")
100
  clear_btn=gr.Button("Clear")
101
  client_choice=gr.Dropdown(label="Models",type='index',choices=[c for c in models],value=models[0],interactive=True)
102
-
103
  with gr.Column(scale=1):
104
  with gr.Group():
105
  rand = gr.Checkbox(label="Random Seed", value=True)
106
  seed=gr.Slider(label="Seed", minimum=1, maximum=1111111111111111,step=1, value=rand_val)
107
- tokens = gr.Slider(label="Max new tokens",value=6400,minimum=0,maximum=8000,step=64,interactive=True, visible=True,info="The maximum number of tokens")
108
  temp=gr.Slider(label="Temperature",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
109
  top_p=gr.Slider(label="Top-P",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
110
  rep_p=gr.Slider(label="Repetition Penalty",step=0.1, minimum=0.1, maximum=2.0, value=1.0)
 
111
  with gr.Accordion(label="Screenshot",open=False):
112
  with gr.Row():
113
  with gr.Column(scale=3):
@@ -120,10 +130,10 @@ with gr.Blocks() as app:
120
  wait_time=gr.Number(label="Wait Time",value=3000)
121
  theme=gr.Radio(label="Theme", choices=["light","dark"],value="light")
122
  chatblock=gr.Dropdown(label="Chatblocks",info="Choose specific blocks of chat",choices=[c for c in range(1,40)],multiselect=True)
123
-
124
  im_go=im_btn.click(get_screenshot,[chat_b,im_height,im_width,chatblock,theme,wait_time],img)
125
- chat_sub=inp.submit(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,client_choice,seed,temp,tokens,top_p,rep_p],chat_b)
126
- go=btn.click(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,client_choice,seed,temp,tokens,top_p,rep_p],chat_b)
127
  stop_btn.click(None,None,None,cancels=[go,im_go,chat_sub])
128
- clear_btn.click(clear_fn,None,[chat_b])
129
  app.queue(default_concurrency_limit=10).launch()
 
17
  InferenceClient(models[3]),
18
  ]
19
 
20
+ VERBOSE=True
21
 
22
  def format_prompt(message, history):
23
  prompt = ""
24
  if history:
25
  #<start_of_turn>userHow does the brain work?<end_of_turn><start_of_turn>model
26
  for user_prompt, bot_response in history:
27
+ prompt += f"{user_prompt}\n"
28
+ #print(prompt)
29
+ prompt += f"{bot_response}\n"
30
+ #print(prompt)
31
  prompt += f"<start_of_turn>user{message}<end_of_turn><start_of_turn>model"
32
  return prompt
33
 
34
+ def chat_inf(system_prompt,prompt,history,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem):
 
 
35
  #token max=8192
36
+ hist_len=0
37
  client=clients[int(client_choice)-1]
38
  if not history:
39
  history = []
40
  hist_len=0
41
+ if not memory:
42
+ memory = []
43
+ mem_len=0
44
+ if memory:
45
+ for ea in memory[0-chat_mem:]:
46
+ hist_len+=len(str(ea))
47
+ in_len=len(system_prompt+prompt)+hist_len
48
+
49
+ if (in_len+tokens) > 8000:
50
+ history.append((prompt,"Wait, that's too many tokens, please reduce the 'Chat Memory' value, or reduce the 'Max new tokens' value"))
51
+ yield history,memory
52
+ else:
53
+ generate_kwargs = dict(
54
+ temperature=temp,
55
+ max_new_tokens=tokens,
56
+ top_p=top_p,
57
+ repetition_penalty=rep_p,
58
+ do_sample=True,
59
+ seed=seed,
60
+ )
61
+ formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", memory[0-chat_mem:])
62
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
63
+ output = ""
64
+ for response in stream:
65
+ output += response.token.text
66
+ yield [(prompt,output)],memory
67
+ history.append((prompt,output))
68
+ memory.append((prompt,output))
69
+ yield history,memory
70
+ if VERBOSE==True:
71
+ print("\n######### HIST "+str(in_len))
72
+ print("\n######### TOKENS "+str(tokens))
73
+ print("\n######### PROMPT "+str(len(formatted_prompt)))
74
 
75
  def get_screenshot(chat: list,height=5000,width=600,chatblock=[],theme="light",wait=3000,header=True):
76
  print(chatblock)
 
82
  print(out)
83
  return out
84
 
 
 
85
  def clear_fn():
86
+ return None,None,None
87
  rand_val=random.randint(1,1111111111111111)
88
+
89
  def check_rand(inp,val):
90
  if inp==True:
91
  return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=random.randint(1,1111111111111111))
92
  else:
93
  return gr.Slider(label="Seed", minimum=1, maximum=1111111111111111, value=int(val))
 
 
94
 
95
  with gr.Blocks() as app:
96
+ memory=gr.State()
97
  gr.HTML("""<center><h1 style='font-size:xx-large;'>Google Gemma Models</h1><br><h3>running on Huggingface Inference Client</h3><br><h7>EXPERIMENTAL""")
98
  chat_b = gr.Chatbot(height=500)
99
  with gr.Group():
 
109
  stop_btn=gr.Button("Stop")
110
  clear_btn=gr.Button("Clear")
111
  client_choice=gr.Dropdown(label="Models",type='index',choices=[c for c in models],value=models[0],interactive=True)
 
112
  with gr.Column(scale=1):
113
  with gr.Group():
114
  rand = gr.Checkbox(label="Random Seed", value=True)
115
  seed=gr.Slider(label="Seed", minimum=1, maximum=1111111111111111,step=1, value=rand_val)
116
+ tokens = gr.Slider(label="Max new tokens",value=1600,minimum=0,maximum=8000,step=64,interactive=True, visible=True,info="The maximum number of tokens")
117
  temp=gr.Slider(label="Temperature",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
118
  top_p=gr.Slider(label="Top-P",step=0.01, minimum=0.01, maximum=1.0, value=0.9)
119
  rep_p=gr.Slider(label="Repetition Penalty",step=0.1, minimum=0.1, maximum=2.0, value=1.0)
120
+ chat_mem=gr.Number(label="Chat Memory", info="Number of previous chats to retain",value=4)
121
  with gr.Accordion(label="Screenshot",open=False):
122
  with gr.Row():
123
  with gr.Column(scale=3):
 
130
  wait_time=gr.Number(label="Wait Time",value=3000)
131
  theme=gr.Radio(label="Theme", choices=["light","dark"],value="light")
132
  chatblock=gr.Dropdown(label="Chatblocks",info="Choose specific blocks of chat",choices=[c for c in range(1,40)],multiselect=True)
133
+
134
  im_go=im_btn.click(get_screenshot,[chat_b,im_height,im_width,chatblock,theme,wait_time],img)
135
+ chat_sub=inp.submit(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem],[chat_b,memory])
136
+ go=btn.click(check_rand,[rand,seed],seed).then(chat_inf,[sys_inp,inp,chat_b,memory,client_choice,seed,temp,tokens,top_p,rep_p,chat_mem],[chat_b,memory])
137
  stop_btn.click(None,None,None,cancels=[go,im_go,chat_sub])
138
+ clear_btn.click(clear_fn,None,[inp,sys_inp,chat_b,memory])
139
  app.queue(default_concurrency_limit=10).launch()