cheesyFishes
commited on
Commit
β’
1d0af05
1
Parent(s):
8c9ef31
update to llama-index v0.6.13
Browse files- app.py +26 -13
- constants.py +4 -4
- index.json +0 -0
- initial_index/docstore.json +0 -0
- initial_index/index_store.json +1 -0
- initial_index/vector_store.json +0 -0
- requirements.txt +15 -5
app.py
CHANGED
@@ -4,14 +4,17 @@ import streamlit as st
|
|
4 |
from PIL import Image
|
5 |
from llama_index import (
|
6 |
Document,
|
7 |
-
|
8 |
GPTListIndex,
|
9 |
LLMPredictor,
|
10 |
ServiceContext,
|
11 |
SimpleDirectoryReader,
|
12 |
PromptHelper,
|
|
|
|
|
|
|
13 |
)
|
14 |
-
from llama_index.readers.file.base import
|
15 |
|
16 |
from constants import DEFAULT_TERM_STR, DEFAULT_TERMS, REFINE_TEMPLATE, TEXT_QA_TEMPLATE
|
17 |
from utils import get_llm
|
@@ -23,13 +26,14 @@ if "all_terms" not in st.session_state:
|
|
23 |
|
24 |
@st.cache_resource
|
25 |
def get_file_extractor():
|
26 |
-
|
27 |
-
|
|
|
28 |
file_extractor.update(
|
29 |
{
|
30 |
-
".jpg":
|
31 |
-
".png":
|
32 |
-
".jpeg":
|
33 |
}
|
34 |
)
|
35 |
|
@@ -52,7 +56,9 @@ def extract_terms(documents, term_extract_str, llm_name, model_temperature, api_
|
|
52 |
|
53 |
temp_index = GPTListIndex.from_documents(documents, service_context=service_context)
|
54 |
terms_definitions = str(
|
55 |
-
temp_index.
|
|
|
|
|
56 |
)
|
57 |
terms_definitions = [
|
58 |
x
|
@@ -83,8 +89,9 @@ def initialize_index(llm_name, model_temperature, api_key):
|
|
83 |
|
84 |
service_context = ServiceContext.from_defaults(llm_predictor=LLMPredictor(llm=llm))
|
85 |
|
86 |
-
index =
|
87 |
-
"./
|
|
|
88 |
)
|
89 |
|
90 |
return index
|
@@ -202,8 +209,14 @@ with query_tab:
|
|
202 |
query_text = st.text_input("Ask about a term or definition:")
|
203 |
if query_text:
|
204 |
with st.spinner("Generating answer..."):
|
205 |
-
response =
|
206 |
-
|
207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
)
|
209 |
st.markdown(str(response))
|
|
|
4 |
from PIL import Image
|
5 |
from llama_index import (
|
6 |
Document,
|
7 |
+
GPTVectorStoreIndex,
|
8 |
GPTListIndex,
|
9 |
LLMPredictor,
|
10 |
ServiceContext,
|
11 |
SimpleDirectoryReader,
|
12 |
PromptHelper,
|
13 |
+
StorageContext,
|
14 |
+
load_index_from_storage,
|
15 |
+
download_loader,
|
16 |
)
|
17 |
+
from llama_index.readers.file.base import DEFAULT_FILE_READER_CLS
|
18 |
|
19 |
from constants import DEFAULT_TERM_STR, DEFAULT_TERMS, REFINE_TEMPLATE, TEXT_QA_TEMPLATE
|
20 |
from utils import get_llm
|
|
|
26 |
|
27 |
@st.cache_resource
|
28 |
def get_file_extractor():
|
29 |
+
ImageReader = download_loader("ImageReader")
|
30 |
+
image_loader = ImageReader(text_type="plain_text")
|
31 |
+
file_extractor = DEFAULT_FILE_READER_CLS
|
32 |
file_extractor.update(
|
33 |
{
|
34 |
+
".jpg": image_loader,
|
35 |
+
".png": image_loader,
|
36 |
+
".jpeg": image_loader,
|
37 |
}
|
38 |
)
|
39 |
|
|
|
56 |
|
57 |
temp_index = GPTListIndex.from_documents(documents, service_context=service_context)
|
58 |
terms_definitions = str(
|
59 |
+
temp_index.as_query_engine(response_mode="tree_summarize").query(
|
60 |
+
term_extract_str
|
61 |
+
)
|
62 |
)
|
63 |
terms_definitions = [
|
64 |
x
|
|
|
89 |
|
90 |
service_context = ServiceContext.from_defaults(llm_predictor=LLMPredictor(llm=llm))
|
91 |
|
92 |
+
index = load_index_from_storage(
|
93 |
+
StorageContext.from_defaults(persist_dir="./initial_index"),
|
94 |
+
service_context=service_context,
|
95 |
)
|
96 |
|
97 |
return index
|
|
|
209 |
query_text = st.text_input("Ask about a term or definition:")
|
210 |
if query_text:
|
211 |
with st.spinner("Generating answer..."):
|
212 |
+
response = (
|
213 |
+
st.session_state["llama_index"]
|
214 |
+
.as_query_engine(
|
215 |
+
similarity_top_k=5,
|
216 |
+
response_mode="compact",
|
217 |
+
text_qa_template=TEXT_QA_TEMPLATE,
|
218 |
+
refine_template=REFINE_TEMPLATE,
|
219 |
+
)
|
220 |
+
.query(query_text)
|
221 |
)
|
222 |
st.markdown(str(response))
|
constants.py
CHANGED
@@ -13,7 +13,7 @@ DEFAULT_TEXT_QA_PROMPT_TMPL = (
|
|
13 |
"---------------------\n"
|
14 |
"{context_str}"
|
15 |
"\n---------------------\n"
|
16 |
-
"Given the context information answer the following question "
|
17 |
"(if you don't know the answer, use the best of your knowledge): {query_str}\n"
|
18 |
)
|
19 |
TEXT_QA_TEMPLATE = QuestionAnswerPrompt(DEFAULT_TEXT_QA_PROMPT_TMPL)
|
@@ -29,6 +29,7 @@ DEFAULT_REFINE_PROMPT_TMPL = (
|
|
29 |
"------------\n"
|
30 |
"Given the new context and using the best of your knowledge, improve the existing answer. "
|
31 |
"If you can't improve the existing answer, just repeat it again. "
|
|
|
32 |
"Do not mention that you've read the above context."
|
33 |
)
|
34 |
DEFAULT_REFINE_PROMPT = RefinePrompt(DEFAULT_REFINE_PROMPT_TMPL)
|
@@ -44,6 +45,7 @@ CHAT_REFINE_PROMPT_TMPL_MSGS = [
|
|
44 |
"------------\n"
|
45 |
"Given the new context and using the best of your knowledge, improve the existing answer. "
|
46 |
"If you can't improve the existing answer, just repeat it again. "
|
|
|
47 |
"Do not mention that you've read the above context."
|
48 |
),
|
49 |
]
|
@@ -56,9 +58,7 @@ DEFAULT_REFINE_PROMPT_SEL_LC = ConditionalPromptSelector(
|
|
56 |
default_prompt=DEFAULT_REFINE_PROMPT.get_langchain_prompt(),
|
57 |
conditionals=[(is_chat_model, CHAT_REFINE_PROMPT.get_langchain_prompt())],
|
58 |
)
|
59 |
-
REFINE_TEMPLATE = RefinePrompt(
|
60 |
-
langchain_prompt_selector=DEFAULT_REFINE_PROMPT_SEL_LC
|
61 |
-
)
|
62 |
|
63 |
DEFAULT_TERM_STR = (
|
64 |
"Make a list of terms and definitions that are defined in the context, "
|
|
|
13 |
"---------------------\n"
|
14 |
"{context_str}"
|
15 |
"\n---------------------\n"
|
16 |
+
"Given the context information, directly answer the following question "
|
17 |
"(if you don't know the answer, use the best of your knowledge): {query_str}\n"
|
18 |
)
|
19 |
TEXT_QA_TEMPLATE = QuestionAnswerPrompt(DEFAULT_TEXT_QA_PROMPT_TMPL)
|
|
|
29 |
"------------\n"
|
30 |
"Given the new context and using the best of your knowledge, improve the existing answer. "
|
31 |
"If you can't improve the existing answer, just repeat it again. "
|
32 |
+
"Do not include un-needed or un-helpful information that is shown in the new context. "
|
33 |
"Do not mention that you've read the above context."
|
34 |
)
|
35 |
DEFAULT_REFINE_PROMPT = RefinePrompt(DEFAULT_REFINE_PROMPT_TMPL)
|
|
|
45 |
"------------\n"
|
46 |
"Given the new context and using the best of your knowledge, improve the existing answer. "
|
47 |
"If you can't improve the existing answer, just repeat it again. "
|
48 |
+
"Do not include un-needed or un-helpful information that is shown in the new context. "
|
49 |
"Do not mention that you've read the above context."
|
50 |
),
|
51 |
]
|
|
|
58 |
default_prompt=DEFAULT_REFINE_PROMPT.get_langchain_prompt(),
|
59 |
conditionals=[(is_chat_model, CHAT_REFINE_PROMPT.get_langchain_prompt())],
|
60 |
)
|
61 |
+
REFINE_TEMPLATE = RefinePrompt(langchain_prompt_selector=DEFAULT_REFINE_PROMPT_SEL_LC)
|
|
|
|
|
62 |
|
63 |
DEFAULT_TERM_STR = (
|
64 |
"Make a list of terms and definitions that are defined in the context, "
|
index.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
initial_index/docstore.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
initial_index/index_store.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"index_store/data": {"894d0818-364d-47d0-8241-3d5bcfc37908": {"__type__": "vector_store", "__data__": {"index_id": "894d0818-364d-47d0-8241-3d5bcfc37908", "summary": null, "nodes_dict": {"ec371470-e24a-461c-8462-aab6aef3b298": "ec371470-e24a-461c-8462-aab6aef3b298", "2f0b5eab-e6ce-4861-b04e-48e464d11c25": "2f0b5eab-e6ce-4861-b04e-48e464d11c25", "010d0eb6-5e7d-45a5-ab2d-2efc2546be6e": "010d0eb6-5e7d-45a5-ab2d-2efc2546be6e", "150d5ea1-3e98-471f-8074-37c5a5068e9e": "150d5ea1-3e98-471f-8074-37c5a5068e9e", "a4a6684a-715e-440c-94eb-960720407380": "a4a6684a-715e-440c-94eb-960720407380", "360346f6-5868-433e-b8ae-ae373f1b1398": "360346f6-5868-433e-b8ae-ae373f1b1398", "94ecc25d-c7e8-4d78-a6b1-32391049f233": "94ecc25d-c7e8-4d78-a6b1-32391049f233", "2bfe8914-bca7-4df9-9a47-81db053b1f20": "2bfe8914-bca7-4df9-9a47-81db053b1f20", "401e1c3e-559e-44c0-80a2-279a94030490": "401e1c3e-559e-44c0-80a2-279a94030490", "793d54ec-17ca-48dc-aba0-ba66e84d29cd": "793d54ec-17ca-48dc-aba0-ba66e84d29cd", "a33209a0-3195-4e12-b39d-861eed88dc80": "a33209a0-3195-4e12-b39d-861eed88dc80", "25ec0f90-0bf9-4e2c-ab5f-e55592e505b0": "25ec0f90-0bf9-4e2c-ab5f-e55592e505b0", "65938051-881c-4f6c-aa4a-d5678629c67e": "65938051-881c-4f6c-aa4a-d5678629c67e", "adb2be9f-4ab0-4a0d-92ba-e13c101899e1": "adb2be9f-4ab0-4a0d-92ba-e13c101899e1", "52bfbf28-911d-423a-9f55-55ac218cee0d": "52bfbf28-911d-423a-9f55-55ac218cee0d", "bfbeb775-00d3-4023-987a-85cacb733f58": "bfbeb775-00d3-4023-987a-85cacb733f58", "95ab52b8-c1f7-4287-b535-4e94c0c05fbe": "95ab52b8-c1f7-4287-b535-4e94c0c05fbe", "e00e4430-8d8c-4f7f-a631-5aa42178b366": "e00e4430-8d8c-4f7f-a631-5aa42178b366", "323da6be-33d9-48c6-a478-cadae3676869": "323da6be-33d9-48c6-a478-cadae3676869", "8210491f-670f-4c4f-8764-4dda73c51cad": "8210491f-670f-4c4f-8764-4dda73c51cad", "cbc458ea-5094-47ac-b480-2ebd8bd4bfad": "cbc458ea-5094-47ac-b480-2ebd8bd4bfad", "e62d3fd9-bb4e-49d2-a33c-ebbd54479bb2": "e62d3fd9-bb4e-49d2-a33c-ebbd54479bb2", "23f3ca55-4d90-4371-8110-b5bee7df7e0c": "23f3ca55-4d90-4371-8110-b5bee7df7e0c", "9379b995-7bf4-4cfe-a734-55d0c53f6fb8": "9379b995-7bf4-4cfe-a734-55d0c53f6fb8", "04a9bd09-91f1-4eb9-ac16-439a5facb242": "04a9bd09-91f1-4eb9-ac16-439a5facb242", "72cb6e83-5c98-491b-9c5d-4086f7ee7e50": "72cb6e83-5c98-491b-9c5d-4086f7ee7e50", "b9fa2947-bfaf-43d2-a6ca-c2b62818cb65": "b9fa2947-bfaf-43d2-a6ca-c2b62818cb65", "519dba62-2b32-4262-aceb-709d3bfaff99": "519dba62-2b32-4262-aceb-709d3bfaff99", "5e5c1b21-c385-41b7-ad05-8154d0e527ea": "5e5c1b21-c385-41b7-ad05-8154d0e527ea", "95e60817-e54a-4de3-a2ee-2247f64016c7": "95e60817-e54a-4de3-a2ee-2247f64016c7", "0e7b6a6b-68b0-4cb5-af47-ed6145c43055": "0e7b6a6b-68b0-4cb5-af47-ed6145c43055", "7144e1fb-dfbc-4666-90d0-002fcaf3a304": "7144e1fb-dfbc-4666-90d0-002fcaf3a304", "a1163e41-e5f6-419a-884e-d1857eb9e606": "a1163e41-e5f6-419a-884e-d1857eb9e606", "f098a7cd-68c1-4b51-a568-4e9dce983150": "f098a7cd-68c1-4b51-a568-4e9dce983150", "031420d8-c23b-4903-ab64-e667e1964317": "031420d8-c23b-4903-ab64-e667e1964317", "d6511485-85c8-4d56-8a0e-fe9ccd6b4b78": "d6511485-85c8-4d56-8a0e-fe9ccd6b4b78", "75d9a8f3-fd4d-4187-ada2-329ec74d6aba": "75d9a8f3-fd4d-4187-ada2-329ec74d6aba", "b92fab54-0c5e-4c9d-85b7-1ecb8d665985": "b92fab54-0c5e-4c9d-85b7-1ecb8d665985", "1f22e89c-95b8-440b-9940-15fbc6b7202f": "1f22e89c-95b8-440b-9940-15fbc6b7202f"}, "doc_id_dict": {"a708f585-999f-4072-b4e9-e5acfd0afd3d": ["ec371470-e24a-461c-8462-aab6aef3b298", "2f0b5eab-e6ce-4861-b04e-48e464d11c25", "010d0eb6-5e7d-45a5-ab2d-2efc2546be6e", "150d5ea1-3e98-471f-8074-37c5a5068e9e", "a4a6684a-715e-440c-94eb-960720407380", "360346f6-5868-433e-b8ae-ae373f1b1398", "94ecc25d-c7e8-4d78-a6b1-32391049f233", "2bfe8914-bca7-4df9-9a47-81db053b1f20", "401e1c3e-559e-44c0-80a2-279a94030490", "793d54ec-17ca-48dc-aba0-ba66e84d29cd", "a33209a0-3195-4e12-b39d-861eed88dc80", "25ec0f90-0bf9-4e2c-ab5f-e55592e505b0", "65938051-881c-4f6c-aa4a-d5678629c67e", "adb2be9f-4ab0-4a0d-92ba-e13c101899e1", "52bfbf28-911d-423a-9f55-55ac218cee0d", "bfbeb775-00d3-4023-987a-85cacb733f58", "95ab52b8-c1f7-4287-b535-4e94c0c05fbe", "e00e4430-8d8c-4f7f-a631-5aa42178b366", "323da6be-33d9-48c6-a478-cadae3676869", "8210491f-670f-4c4f-8764-4dda73c51cad", "cbc458ea-5094-47ac-b480-2ebd8bd4bfad", "e62d3fd9-bb4e-49d2-a33c-ebbd54479bb2", "23f3ca55-4d90-4371-8110-b5bee7df7e0c", "9379b995-7bf4-4cfe-a734-55d0c53f6fb8", "04a9bd09-91f1-4eb9-ac16-439a5facb242", "72cb6e83-5c98-491b-9c5d-4086f7ee7e50", "b9fa2947-bfaf-43d2-a6ca-c2b62818cb65", "519dba62-2b32-4262-aceb-709d3bfaff99", "5e5c1b21-c385-41b7-ad05-8154d0e527ea", "95e60817-e54a-4de3-a2ee-2247f64016c7", "0e7b6a6b-68b0-4cb5-af47-ed6145c43055", "7144e1fb-dfbc-4666-90d0-002fcaf3a304", "a1163e41-e5f6-419a-884e-d1857eb9e606", "f098a7cd-68c1-4b51-a568-4e9dce983150", "031420d8-c23b-4903-ab64-e667e1964317", "d6511485-85c8-4d56-8a0e-fe9ccd6b4b78", "75d9a8f3-fd4d-4187-ada2-329ec74d6aba", "b92fab54-0c5e-4c9d-85b7-1ecb8d665985", "1f22e89c-95b8-440b-9940-15fbc6b7202f"]}, "embeddings_dict": {}}}}}
|
initial_index/vector_store.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -1,6 +1,16 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
streamlit==1.19.0
|
6 |
-
|
|
|
|
|
|
|
|
|
|
1 |
+
altair==4.2.2
|
2 |
+
langchain==0.0.154
|
3 |
+
llama-index==0.6.13
|
4 |
+
numpy==1.24.3
|
5 |
+
openai==0.27.7
|
6 |
+
pandas==2.0.2
|
7 |
+
Pillow==9.5.0
|
8 |
+
pytesseract==0.3.10
|
9 |
+
sentencepiece==0.1.99
|
10 |
+
SQLAlchemy==2.0.15
|
11 |
streamlit==1.19.0
|
12 |
+
streamlit-chat==0.0.2.2
|
13 |
+
tokenizers==0.13.3
|
14 |
+
torch==2.0.1
|
15 |
+
torchvision==0.15.2
|
16 |
+
transformers==4.29.2
|