Spaces:
Paused
Paused
Daniel Marques
commited on
Commit
•
61d38da
1
Parent(s):
415fe69
feat: add ministral model
Browse files- constants.py +5 -5
- prompt_template_utils.py +4 -4
constants.py
CHANGED
@@ -32,7 +32,7 @@ CHROMA_SETTINGS = Settings(
|
|
32 |
)
|
33 |
|
34 |
# Context Window and Max New Tokens
|
35 |
-
CONTEXT_WINDOW_SIZE =
|
36 |
MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
|
37 |
|
38 |
#### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
|
@@ -98,14 +98,14 @@ EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Ac
|
|
98 |
#### (FOR GGUF MODELS)
|
99 |
####
|
100 |
|
101 |
-
MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
|
102 |
-
MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
|
103 |
|
104 |
# MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
|
105 |
# MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
|
106 |
|
107 |
-
|
108 |
-
|
109 |
|
110 |
# MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
|
111 |
# MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
|
|
|
32 |
)
|
33 |
|
34 |
# Context Window and Max New Tokens
|
35 |
+
CONTEXT_WINDOW_SIZE = 3000
|
36 |
MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
|
37 |
|
38 |
#### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
|
|
|
98 |
#### (FOR GGUF MODELS)
|
99 |
####
|
100 |
|
101 |
+
# MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
|
102 |
+
# MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
|
103 |
|
104 |
# MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
|
105 |
# MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
|
106 |
|
107 |
+
MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
|
108 |
+
MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
|
109 |
|
110 |
# MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
|
111 |
# MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
|
prompt_template_utils.py
CHANGED
@@ -8,9 +8,9 @@ from langchain.memory import ConversationBufferMemory
|
|
8 |
from langchain.prompts import PromptTemplate
|
9 |
from langchain.memory.chat_message_histories import RedisChatMessageHistory
|
10 |
|
11 |
-
message_history = RedisChatMessageHistory(
|
12 |
-
|
13 |
-
)
|
14 |
|
15 |
# this is specific to Llama-2.
|
16 |
|
@@ -89,7 +89,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
|
|
89 |
)
|
90 |
prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
|
91 |
|
92 |
-
memory = ConversationBufferMemory(input_key="question", memory_key="history"
|
93 |
|
94 |
return (
|
95 |
prompt,
|
|
|
8 |
from langchain.prompts import PromptTemplate
|
9 |
from langchain.memory.chat_message_histories import RedisChatMessageHistory
|
10 |
|
11 |
+
# message_history = RedisChatMessageHistory(
|
12 |
+
# url="redis://localhost:6379/1", ttl=600, session_id="my-session"
|
13 |
+
# )
|
14 |
|
15 |
# this is specific to Llama-2.
|
16 |
|
|
|
89 |
)
|
90 |
prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
|
91 |
|
92 |
+
memory = ConversationBufferMemory(input_key="question", memory_key="history")
|
93 |
|
94 |
return (
|
95 |
prompt,
|