Spaces:

dkdaniz
/

katara

Paused

Daniel Marques commited on Oct 28, 2023

Commit

61d38da

•

1 Parent(s): 415fe69

feat: add ministral model

Files changed (2) hide show

constants.py CHANGED Viewed

@@ -32,7 +32,7 @@ CHROMA_SETTINGS = Settings(
 )
 # Context Window and Max New Tokens
-CONTEXT_WINDOW_SIZE = 2048
 MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE  # int(CONTEXT_WINDOW_SIZE/4)
 #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
@@ -98,14 +98,14 @@ EMBEDDING_MODEL_NAME = "hkunlp/instructor-large"  # Uses 1.5 GB of VRAM (High Ac
 #### (FOR GGUF MODELS)
 ####
-MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
-MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
 # MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
 # MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
-# MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
-# MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
 # MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
 # MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"

 )
 # Context Window and Max New Tokens
+CONTEXT_WINDOW_SIZE = 3000
 MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE  # int(CONTEXT_WINDOW_SIZE/4)
 #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
 #### (FOR GGUF MODELS)
 ####
+# MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
+# MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
 # MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
 # MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
+MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
+MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
 # MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
 # MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"

prompt_template_utils.py CHANGED Viewed

@@ -8,9 +8,9 @@ from langchain.memory import ConversationBufferMemory
 from langchain.prompts import PromptTemplate
 from langchain.memory.chat_message_histories import RedisChatMessageHistory
-message_history = RedisChatMessageHistory(
-    url="redis://localhost:6379/1", ttl=600, session_id="my-session"
-)
 # this is specific to Llama-2.
@@ -89,7 +89,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
             )
             prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
-    memory = ConversationBufferMemory(input_key="question", memory_key="history", chat_memory=message_history)
     return (
         prompt,

 from langchain.prompts import PromptTemplate
 from langchain.memory.chat_message_histories import RedisChatMessageHistory
+# message_history = RedisChatMessageHistory(
+#     url="redis://localhost:6379/1", ttl=600, session_id="my-session"
+# )
 # this is specific to Llama-2.
             )
             prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
+    memory = ConversationBufferMemory(input_key="question", memory_key="history")
     return (
         prompt,