cheesyFishes commited on
Commit
1d0af05
β€’
1 Parent(s): 8c9ef31

update to llama-index v0.6.13

Browse files
app.py CHANGED
@@ -4,14 +4,17 @@ import streamlit as st
4
  from PIL import Image
5
  from llama_index import (
6
  Document,
7
- GPTSimpleVectorIndex,
8
  GPTListIndex,
9
  LLMPredictor,
10
  ServiceContext,
11
  SimpleDirectoryReader,
12
  PromptHelper,
 
 
 
13
  )
14
- from llama_index.readers.file.base import DEFAULT_FILE_EXTRACTOR, ImageParser
15
 
16
  from constants import DEFAULT_TERM_STR, DEFAULT_TERMS, REFINE_TEMPLATE, TEXT_QA_TEMPLATE
17
  from utils import get_llm
@@ -23,13 +26,14 @@ if "all_terms" not in st.session_state:
23
 
24
  @st.cache_resource
25
  def get_file_extractor():
26
- image_parser = ImageParser(keep_image=True, parse_text=True)
27
- file_extractor = DEFAULT_FILE_EXTRACTOR
 
28
  file_extractor.update(
29
  {
30
- ".jpg": image_parser,
31
- ".png": image_parser,
32
- ".jpeg": image_parser,
33
  }
34
  )
35
 
@@ -52,7 +56,9 @@ def extract_terms(documents, term_extract_str, llm_name, model_temperature, api_
52
 
53
  temp_index = GPTListIndex.from_documents(documents, service_context=service_context)
54
  terms_definitions = str(
55
- temp_index.query(term_extract_str, response_mode="tree_summarize")
 
 
56
  )
57
  terms_definitions = [
58
  x
@@ -83,8 +89,9 @@ def initialize_index(llm_name, model_temperature, api_key):
83
 
84
  service_context = ServiceContext.from_defaults(llm_predictor=LLMPredictor(llm=llm))
85
 
86
- index = GPTSimpleVectorIndex.load_from_disk(
87
- "./index.json", service_context=service_context
 
88
  )
89
 
90
  return index
@@ -202,8 +209,14 @@ with query_tab:
202
  query_text = st.text_input("Ask about a term or definition:")
203
  if query_text:
204
  with st.spinner("Generating answer..."):
205
- response = st.session_state["llama_index"].query(
206
- query_text, similarity_top_k=5, response_mode="compact",
207
- text_qa_template=TEXT_QA_TEMPLATE, refine_template=REFINE_TEMPLATE
 
 
 
 
 
 
208
  )
209
  st.markdown(str(response))
 
4
  from PIL import Image
5
  from llama_index import (
6
  Document,
7
+ GPTVectorStoreIndex,
8
  GPTListIndex,
9
  LLMPredictor,
10
  ServiceContext,
11
  SimpleDirectoryReader,
12
  PromptHelper,
13
+ StorageContext,
14
+ load_index_from_storage,
15
+ download_loader,
16
  )
17
+ from llama_index.readers.file.base import DEFAULT_FILE_READER_CLS
18
 
19
  from constants import DEFAULT_TERM_STR, DEFAULT_TERMS, REFINE_TEMPLATE, TEXT_QA_TEMPLATE
20
  from utils import get_llm
 
26
 
27
  @st.cache_resource
28
  def get_file_extractor():
29
+ ImageReader = download_loader("ImageReader")
30
+ image_loader = ImageReader(text_type="plain_text")
31
+ file_extractor = DEFAULT_FILE_READER_CLS
32
  file_extractor.update(
33
  {
34
+ ".jpg": image_loader,
35
+ ".png": image_loader,
36
+ ".jpeg": image_loader,
37
  }
38
  )
39
 
 
56
 
57
  temp_index = GPTListIndex.from_documents(documents, service_context=service_context)
58
  terms_definitions = str(
59
+ temp_index.as_query_engine(response_mode="tree_summarize").query(
60
+ term_extract_str
61
+ )
62
  )
63
  terms_definitions = [
64
  x
 
89
 
90
  service_context = ServiceContext.from_defaults(llm_predictor=LLMPredictor(llm=llm))
91
 
92
+ index = load_index_from_storage(
93
+ StorageContext.from_defaults(persist_dir="./initial_index"),
94
+ service_context=service_context,
95
  )
96
 
97
  return index
 
209
  query_text = st.text_input("Ask about a term or definition:")
210
  if query_text:
211
  with st.spinner("Generating answer..."):
212
+ response = (
213
+ st.session_state["llama_index"]
214
+ .as_query_engine(
215
+ similarity_top_k=5,
216
+ response_mode="compact",
217
+ text_qa_template=TEXT_QA_TEMPLATE,
218
+ refine_template=REFINE_TEMPLATE,
219
+ )
220
+ .query(query_text)
221
  )
222
  st.markdown(str(response))
constants.py CHANGED
@@ -13,7 +13,7 @@ DEFAULT_TEXT_QA_PROMPT_TMPL = (
13
  "---------------------\n"
14
  "{context_str}"
15
  "\n---------------------\n"
16
- "Given the context information answer the following question "
17
  "(if you don't know the answer, use the best of your knowledge): {query_str}\n"
18
  )
19
  TEXT_QA_TEMPLATE = QuestionAnswerPrompt(DEFAULT_TEXT_QA_PROMPT_TMPL)
@@ -29,6 +29,7 @@ DEFAULT_REFINE_PROMPT_TMPL = (
29
  "------------\n"
30
  "Given the new context and using the best of your knowledge, improve the existing answer. "
31
  "If you can't improve the existing answer, just repeat it again. "
 
32
  "Do not mention that you've read the above context."
33
  )
34
  DEFAULT_REFINE_PROMPT = RefinePrompt(DEFAULT_REFINE_PROMPT_TMPL)
@@ -44,6 +45,7 @@ CHAT_REFINE_PROMPT_TMPL_MSGS = [
44
  "------------\n"
45
  "Given the new context and using the best of your knowledge, improve the existing answer. "
46
  "If you can't improve the existing answer, just repeat it again. "
 
47
  "Do not mention that you've read the above context."
48
  ),
49
  ]
@@ -56,9 +58,7 @@ DEFAULT_REFINE_PROMPT_SEL_LC = ConditionalPromptSelector(
56
  default_prompt=DEFAULT_REFINE_PROMPT.get_langchain_prompt(),
57
  conditionals=[(is_chat_model, CHAT_REFINE_PROMPT.get_langchain_prompt())],
58
  )
59
- REFINE_TEMPLATE = RefinePrompt(
60
- langchain_prompt_selector=DEFAULT_REFINE_PROMPT_SEL_LC
61
- )
62
 
63
  DEFAULT_TERM_STR = (
64
  "Make a list of terms and definitions that are defined in the context, "
 
13
  "---------------------\n"
14
  "{context_str}"
15
  "\n---------------------\n"
16
+ "Given the context information, directly answer the following question "
17
  "(if you don't know the answer, use the best of your knowledge): {query_str}\n"
18
  )
19
  TEXT_QA_TEMPLATE = QuestionAnswerPrompt(DEFAULT_TEXT_QA_PROMPT_TMPL)
 
29
  "------------\n"
30
  "Given the new context and using the best of your knowledge, improve the existing answer. "
31
  "If you can't improve the existing answer, just repeat it again. "
32
+ "Do not include un-needed or un-helpful information that is shown in the new context. "
33
  "Do not mention that you've read the above context."
34
  )
35
  DEFAULT_REFINE_PROMPT = RefinePrompt(DEFAULT_REFINE_PROMPT_TMPL)
 
45
  "------------\n"
46
  "Given the new context and using the best of your knowledge, improve the existing answer. "
47
  "If you can't improve the existing answer, just repeat it again. "
48
+ "Do not include un-needed or un-helpful information that is shown in the new context. "
49
  "Do not mention that you've read the above context."
50
  ),
51
  ]
 
58
  default_prompt=DEFAULT_REFINE_PROMPT.get_langchain_prompt(),
59
  conditionals=[(is_chat_model, CHAT_REFINE_PROMPT.get_langchain_prompt())],
60
  )
61
+ REFINE_TEMPLATE = RefinePrompt(langchain_prompt_selector=DEFAULT_REFINE_PROMPT_SEL_LC)
 
 
62
 
63
  DEFAULT_TERM_STR = (
64
  "Make a list of terms and definitions that are defined in the context, "
index.json DELETED
The diff for this file is too large to render. See raw diff
 
initial_index/docstore.json ADDED
The diff for this file is too large to render. See raw diff
 
initial_index/index_store.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"index_store/data": {"894d0818-364d-47d0-8241-3d5bcfc37908": {"__type__": "vector_store", "__data__": {"index_id": "894d0818-364d-47d0-8241-3d5bcfc37908", "summary": null, "nodes_dict": {"ec371470-e24a-461c-8462-aab6aef3b298": "ec371470-e24a-461c-8462-aab6aef3b298", "2f0b5eab-e6ce-4861-b04e-48e464d11c25": "2f0b5eab-e6ce-4861-b04e-48e464d11c25", "010d0eb6-5e7d-45a5-ab2d-2efc2546be6e": "010d0eb6-5e7d-45a5-ab2d-2efc2546be6e", "150d5ea1-3e98-471f-8074-37c5a5068e9e": "150d5ea1-3e98-471f-8074-37c5a5068e9e", "a4a6684a-715e-440c-94eb-960720407380": "a4a6684a-715e-440c-94eb-960720407380", "360346f6-5868-433e-b8ae-ae373f1b1398": "360346f6-5868-433e-b8ae-ae373f1b1398", "94ecc25d-c7e8-4d78-a6b1-32391049f233": "94ecc25d-c7e8-4d78-a6b1-32391049f233", "2bfe8914-bca7-4df9-9a47-81db053b1f20": "2bfe8914-bca7-4df9-9a47-81db053b1f20", "401e1c3e-559e-44c0-80a2-279a94030490": "401e1c3e-559e-44c0-80a2-279a94030490", "793d54ec-17ca-48dc-aba0-ba66e84d29cd": "793d54ec-17ca-48dc-aba0-ba66e84d29cd", "a33209a0-3195-4e12-b39d-861eed88dc80": "a33209a0-3195-4e12-b39d-861eed88dc80", "25ec0f90-0bf9-4e2c-ab5f-e55592e505b0": "25ec0f90-0bf9-4e2c-ab5f-e55592e505b0", "65938051-881c-4f6c-aa4a-d5678629c67e": "65938051-881c-4f6c-aa4a-d5678629c67e", "adb2be9f-4ab0-4a0d-92ba-e13c101899e1": "adb2be9f-4ab0-4a0d-92ba-e13c101899e1", "52bfbf28-911d-423a-9f55-55ac218cee0d": "52bfbf28-911d-423a-9f55-55ac218cee0d", "bfbeb775-00d3-4023-987a-85cacb733f58": "bfbeb775-00d3-4023-987a-85cacb733f58", "95ab52b8-c1f7-4287-b535-4e94c0c05fbe": "95ab52b8-c1f7-4287-b535-4e94c0c05fbe", "e00e4430-8d8c-4f7f-a631-5aa42178b366": "e00e4430-8d8c-4f7f-a631-5aa42178b366", "323da6be-33d9-48c6-a478-cadae3676869": "323da6be-33d9-48c6-a478-cadae3676869", "8210491f-670f-4c4f-8764-4dda73c51cad": "8210491f-670f-4c4f-8764-4dda73c51cad", "cbc458ea-5094-47ac-b480-2ebd8bd4bfad": "cbc458ea-5094-47ac-b480-2ebd8bd4bfad", "e62d3fd9-bb4e-49d2-a33c-ebbd54479bb2": "e62d3fd9-bb4e-49d2-a33c-ebbd54479bb2", "23f3ca55-4d90-4371-8110-b5bee7df7e0c": "23f3ca55-4d90-4371-8110-b5bee7df7e0c", "9379b995-7bf4-4cfe-a734-55d0c53f6fb8": "9379b995-7bf4-4cfe-a734-55d0c53f6fb8", "04a9bd09-91f1-4eb9-ac16-439a5facb242": "04a9bd09-91f1-4eb9-ac16-439a5facb242", "72cb6e83-5c98-491b-9c5d-4086f7ee7e50": "72cb6e83-5c98-491b-9c5d-4086f7ee7e50", "b9fa2947-bfaf-43d2-a6ca-c2b62818cb65": "b9fa2947-bfaf-43d2-a6ca-c2b62818cb65", "519dba62-2b32-4262-aceb-709d3bfaff99": "519dba62-2b32-4262-aceb-709d3bfaff99", "5e5c1b21-c385-41b7-ad05-8154d0e527ea": "5e5c1b21-c385-41b7-ad05-8154d0e527ea", "95e60817-e54a-4de3-a2ee-2247f64016c7": "95e60817-e54a-4de3-a2ee-2247f64016c7", "0e7b6a6b-68b0-4cb5-af47-ed6145c43055": "0e7b6a6b-68b0-4cb5-af47-ed6145c43055", "7144e1fb-dfbc-4666-90d0-002fcaf3a304": "7144e1fb-dfbc-4666-90d0-002fcaf3a304", "a1163e41-e5f6-419a-884e-d1857eb9e606": "a1163e41-e5f6-419a-884e-d1857eb9e606", "f098a7cd-68c1-4b51-a568-4e9dce983150": "f098a7cd-68c1-4b51-a568-4e9dce983150", "031420d8-c23b-4903-ab64-e667e1964317": "031420d8-c23b-4903-ab64-e667e1964317", "d6511485-85c8-4d56-8a0e-fe9ccd6b4b78": "d6511485-85c8-4d56-8a0e-fe9ccd6b4b78", "75d9a8f3-fd4d-4187-ada2-329ec74d6aba": "75d9a8f3-fd4d-4187-ada2-329ec74d6aba", "b92fab54-0c5e-4c9d-85b7-1ecb8d665985": "b92fab54-0c5e-4c9d-85b7-1ecb8d665985", "1f22e89c-95b8-440b-9940-15fbc6b7202f": "1f22e89c-95b8-440b-9940-15fbc6b7202f"}, "doc_id_dict": {"a708f585-999f-4072-b4e9-e5acfd0afd3d": ["ec371470-e24a-461c-8462-aab6aef3b298", "2f0b5eab-e6ce-4861-b04e-48e464d11c25", "010d0eb6-5e7d-45a5-ab2d-2efc2546be6e", "150d5ea1-3e98-471f-8074-37c5a5068e9e", "a4a6684a-715e-440c-94eb-960720407380", "360346f6-5868-433e-b8ae-ae373f1b1398", "94ecc25d-c7e8-4d78-a6b1-32391049f233", "2bfe8914-bca7-4df9-9a47-81db053b1f20", "401e1c3e-559e-44c0-80a2-279a94030490", "793d54ec-17ca-48dc-aba0-ba66e84d29cd", "a33209a0-3195-4e12-b39d-861eed88dc80", "25ec0f90-0bf9-4e2c-ab5f-e55592e505b0", "65938051-881c-4f6c-aa4a-d5678629c67e", "adb2be9f-4ab0-4a0d-92ba-e13c101899e1", "52bfbf28-911d-423a-9f55-55ac218cee0d", "bfbeb775-00d3-4023-987a-85cacb733f58", "95ab52b8-c1f7-4287-b535-4e94c0c05fbe", "e00e4430-8d8c-4f7f-a631-5aa42178b366", "323da6be-33d9-48c6-a478-cadae3676869", "8210491f-670f-4c4f-8764-4dda73c51cad", "cbc458ea-5094-47ac-b480-2ebd8bd4bfad", "e62d3fd9-bb4e-49d2-a33c-ebbd54479bb2", "23f3ca55-4d90-4371-8110-b5bee7df7e0c", "9379b995-7bf4-4cfe-a734-55d0c53f6fb8", "04a9bd09-91f1-4eb9-ac16-439a5facb242", "72cb6e83-5c98-491b-9c5d-4086f7ee7e50", "b9fa2947-bfaf-43d2-a6ca-c2b62818cb65", "519dba62-2b32-4262-aceb-709d3bfaff99", "5e5c1b21-c385-41b7-ad05-8154d0e527ea", "95e60817-e54a-4de3-a2ee-2247f64016c7", "0e7b6a6b-68b0-4cb5-af47-ed6145c43055", "7144e1fb-dfbc-4666-90d0-002fcaf3a304", "a1163e41-e5f6-419a-884e-d1857eb9e606", "f098a7cd-68c1-4b51-a568-4e9dce983150", "031420d8-c23b-4903-ab64-e667e1964317", "d6511485-85c8-4d56-8a0e-fe9ccd6b4b78", "75d9a8f3-fd4d-4187-ada2-329ec74d6aba", "b92fab54-0c5e-4c9d-85b7-1ecb8d665985", "1f22e89c-95b8-440b-9940-15fbc6b7202f"]}, "embeddings_dict": {}}}}}
initial_index/vector_store.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,6 +1,16 @@
1
- langchain==0.0.128
2
- llama-index==0.5.4
3
- Pillow==9.4.0
4
- sentencepiece
 
 
 
 
 
 
5
  streamlit==1.19.0
6
- torch==1.13.0
 
 
 
 
 
1
+ altair==4.2.2
2
+ langchain==0.0.154
3
+ llama-index==0.6.13
4
+ numpy==1.24.3
5
+ openai==0.27.7
6
+ pandas==2.0.2
7
+ Pillow==9.5.0
8
+ pytesseract==0.3.10
9
+ sentencepiece==0.1.99
10
+ SQLAlchemy==2.0.15
11
  streamlit==1.19.0
12
+ streamlit-chat==0.0.2.2
13
+ tokenizers==0.13.3
14
+ torch==2.0.1
15
+ torchvision==0.15.2
16
+ transformers==4.29.2