mikeee commited on
Commit
c4c59a2
1 Parent(s): e4f4dd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -251,8 +251,12 @@ repo_id = "TheBloke/mpt-30B-chat-GGML"
251
  _ = """
252
  mpt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
253
  mpt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
 
 
 
254
  """
255
  model_filename = "mpt-30b-chat.ggmlv0.q4_1.bin"
 
256
  destination_folder = "models"
257
 
258
  download_mpt_quant(destination_folder, repo_id, model_filename)
@@ -261,7 +265,7 @@ logger.info("done dl")
261
 
262
  config = AutoConfig.from_pretrained("mosaicml/mpt-30b-chat", context_length=8192)
263
  llm = AutoModelForCausalLM.from_pretrained(
264
- os.path.abspath("models/mpt-30b-chat.ggmlv0.q4_1.bin"),
265
  model_type="mpt",
266
  config=config,
267
  )
@@ -299,7 +303,7 @@ css = """
299
  """
300
 
301
  with gr.Blocks(
302
- title="mpt-30b-chat-ggml",
303
  theme=gr.themes.Soft(text_size="sm"),
304
  css=css,
305
  ) as block:
@@ -308,7 +312,7 @@ with gr.Blocks(
308
  """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
309
  )
310
  gr.Markdown(
311
- """<h4><center>mpt-30b-chat-ggml</center></h4>
312
 
313
  This demo is of [TheBloke/mpt-30B-chat-GGML](https://huggingface.co/TheBloke/mpt-30B-chat-GGML).
314
 
 
251
  _ = """
252
  mpt-30b-chat.ggmlv0.q4_0.bin q4_0 4 16.85 GB 19.35 GB 4-bit.
253
  mpt-30b-chat.ggmlv0.q4_1.bin q4_1 4 18.73 GB 21.23 GB 4-bit. Higher accuracy than q4_0 but not as high as q5_0. However has quicker inference than q5 models.
254
+ mpt-30b-chat.ggmlv0.q5_0.bin q5_0 5 20.60 GB 23.10 GB
255
+ mpt-30b-chat.ggmlv0.q5_1.bin q5_1 5 22.47 GB 24.97 GB
256
+ mpt-30b-chat.ggmlv0.q8_0.bin q8_0 8 31.83 GB 34.33 GB
257
  """
258
  model_filename = "mpt-30b-chat.ggmlv0.q4_1.bin"
259
+ model_filename = "mpt-30b-chat.ggmlv0.q5_1.bin"
260
  destination_folder = "models"
261
 
262
  download_mpt_quant(destination_folder, repo_id, model_filename)
 
265
 
266
  config = AutoConfig.from_pretrained("mosaicml/mpt-30b-chat", context_length=8192)
267
  llm = AutoModelForCausalLM.from_pretrained(
268
+ os.path.abspath(f"models/{model_name}"),
269
  model_type="mpt",
270
  config=config,
271
  )
 
303
  """
304
 
305
  with gr.Blocks(
306
+ title="mpt-30b-chat-ggml-5bit-1",
307
  theme=gr.themes.Soft(text_size="sm"),
308
  css=css,
309
  ) as block:
 
312
  """<center><a href="https://huggingface.co/spaces/mikeee/mpt-30b-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate"></a> and spin a CPU UPGRADE to avoid the queue</center>"""
313
  )
314
  gr.Markdown(
315
+ """<h4><center>mpt-30b-chat-ggml-5bit-1</center></h4>
316
 
317
  This demo is of [TheBloke/mpt-30B-chat-GGML](https://huggingface.co/TheBloke/mpt-30B-chat-GGML).
318