Spaces:

llamaindex
/

llama_index_term_definition_demo

Running

App Files Files Community

cheesyFishes commited on May 29, 2023

Commit

1d0af05

•

1 Parent(s): 8c9ef31

update to llama-index v0.6.13

Browse files

Files changed (7) hide show

app.py +26 -13
constants.py +4 -4
index.json +0 -0
initial_index/docstore.json +0 -0
initial_index/index_store.json +1 -0
initial_index/vector_store.json +0 -0
requirements.txt +15 -5

app.py CHANGED Viewed

@@ -4,14 +4,17 @@ import streamlit as st
 from PIL import Image
 from llama_index import (
     Document,
-    GPTSimpleVectorIndex,
     GPTListIndex,
     LLMPredictor,
     ServiceContext,
     SimpleDirectoryReader,
     PromptHelper,
 )
-from llama_index.readers.file.base import DEFAULT_FILE_EXTRACTOR, ImageParser
 from constants import DEFAULT_TERM_STR, DEFAULT_TERMS, REFINE_TEMPLATE, TEXT_QA_TEMPLATE
 from utils import get_llm
@@ -23,13 +26,14 @@ if "all_terms" not in st.session_state:
 @st.cache_resource
 def get_file_extractor():
-    image_parser = ImageParser(keep_image=True, parse_text=True)
-    file_extractor = DEFAULT_FILE_EXTRACTOR
     file_extractor.update(
         {
-            ".jpg": image_parser,
-            ".png": image_parser,
-            ".jpeg": image_parser,
         }
     )
@@ -52,7 +56,9 @@ def extract_terms(documents, term_extract_str, llm_name, model_temperature, api_
     temp_index = GPTListIndex.from_documents(documents, service_context=service_context)
     terms_definitions = str(
-        temp_index.query(term_extract_str, response_mode="tree_summarize")
     )
     terms_definitions = [
         x
@@ -83,8 +89,9 @@ def initialize_index(llm_name, model_temperature, api_key):
     service_context = ServiceContext.from_defaults(llm_predictor=LLMPredictor(llm=llm))
-    index = GPTSimpleVectorIndex.load_from_disk(
-        "./index.json", service_context=service_context
     )
     return index
@@ -202,8 +209,14 @@ with query_tab:
         query_text = st.text_input("Ask about a term or definition:")
         if query_text:
             with st.spinner("Generating answer..."):
-                response = st.session_state["llama_index"].query(
-                    query_text, similarity_top_k=5, response_mode="compact",
-                    text_qa_template=TEXT_QA_TEMPLATE, refine_template=REFINE_TEMPLATE
                 )
             st.markdown(str(response))

 from PIL import Image
 from llama_index import (
     Document,
+    GPTVectorStoreIndex,
     GPTListIndex,
     LLMPredictor,
     ServiceContext,
     SimpleDirectoryReader,
     PromptHelper,
+    StorageContext,
+    load_index_from_storage,
+    download_loader,
 )
+from llama_index.readers.file.base import DEFAULT_FILE_READER_CLS
 from constants import DEFAULT_TERM_STR, DEFAULT_TERMS, REFINE_TEMPLATE, TEXT_QA_TEMPLATE
 from utils import get_llm
 @st.cache_resource
 def get_file_extractor():
+    ImageReader = download_loader("ImageReader")
+    image_loader = ImageReader(text_type="plain_text")
+    file_extractor = DEFAULT_FILE_READER_CLS
     file_extractor.update(
         {
+            ".jpg": image_loader,
+            ".png": image_loader,
+            ".jpeg": image_loader,
         }
     )
     temp_index = GPTListIndex.from_documents(documents, service_context=service_context)
     terms_definitions = str(
+        temp_index.as_query_engine(response_mode="tree_summarize").query(
+            term_extract_str
+        )
     )
     terms_definitions = [
         x
     service_context = ServiceContext.from_defaults(llm_predictor=LLMPredictor(llm=llm))
+    index = load_index_from_storage(
+        StorageContext.from_defaults(persist_dir="./initial_index"),
+        service_context=service_context,
     )
     return index
         query_text = st.text_input("Ask about a term or definition:")
         if query_text:
             with st.spinner("Generating answer..."):
+                response = (
+                    st.session_state["llama_index"]
+                    .as_query_engine(
+                        similarity_top_k=5,
+                        response_mode="compact",
+                        text_qa_template=TEXT_QA_TEMPLATE,
+                        refine_template=REFINE_TEMPLATE,
+                    )
+                    .query(query_text)
                 )
             st.markdown(str(response))

constants.py CHANGED Viewed

@@ -13,7 +13,7 @@ DEFAULT_TEXT_QA_PROMPT_TMPL = (
     "---------------------\n"
     "{context_str}"
     "\n---------------------\n"
-    "Given the context information answer the following question "
     "(if you don't know the answer, use the best of your knowledge): {query_str}\n"
 )
 TEXT_QA_TEMPLATE = QuestionAnswerPrompt(DEFAULT_TEXT_QA_PROMPT_TMPL)
@@ -29,6 +29,7 @@ DEFAULT_REFINE_PROMPT_TMPL = (
     "------------\n"
     "Given the new context and using the best of your knowledge, improve the existing answer. "
     "If you can't improve the existing answer, just repeat it again. "
     "Do not mention that you've read the above context."
 )
 DEFAULT_REFINE_PROMPT = RefinePrompt(DEFAULT_REFINE_PROMPT_TMPL)
@@ -44,6 +45,7 @@ CHAT_REFINE_PROMPT_TMPL_MSGS = [
         "------------\n"
         "Given the new context and using the best of your knowledge, improve the existing answer. "
         "If you can't improve the existing answer, just repeat it again. "
         "Do not mention that you've read the above context."
     ),
 ]
@@ -56,9 +58,7 @@ DEFAULT_REFINE_PROMPT_SEL_LC = ConditionalPromptSelector(
     default_prompt=DEFAULT_REFINE_PROMPT.get_langchain_prompt(),
     conditionals=[(is_chat_model, CHAT_REFINE_PROMPT.get_langchain_prompt())],
 )
-REFINE_TEMPLATE = RefinePrompt(
-    langchain_prompt_selector=DEFAULT_REFINE_PROMPT_SEL_LC
-)
 DEFAULT_TERM_STR = (
     "Make a list of terms and definitions that are defined in the context, "

     "---------------------\n"
     "{context_str}"
     "\n---------------------\n"
+    "Given the context information, directly answer the following question "
     "(if you don't know the answer, use the best of your knowledge): {query_str}\n"
 )
 TEXT_QA_TEMPLATE = QuestionAnswerPrompt(DEFAULT_TEXT_QA_PROMPT_TMPL)
     "------------\n"
     "Given the new context and using the best of your knowledge, improve the existing answer. "
     "If you can't improve the existing answer, just repeat it again. "
+    "Do not include un-needed or un-helpful information that is shown in the new context. "
     "Do not mention that you've read the above context."
 )
 DEFAULT_REFINE_PROMPT = RefinePrompt(DEFAULT_REFINE_PROMPT_TMPL)
         "------------\n"
         "Given the new context and using the best of your knowledge, improve the existing answer. "
         "If you can't improve the existing answer, just repeat it again. "
+        "Do not include un-needed or un-helpful information that is shown in the new context. "
         "Do not mention that you've read the above context."
     ),
 ]
     default_prompt=DEFAULT_REFINE_PROMPT.get_langchain_prompt(),
     conditionals=[(is_chat_model, CHAT_REFINE_PROMPT.get_langchain_prompt())],
 )
+REFINE_TEMPLATE = RefinePrompt(langchain_prompt_selector=DEFAULT_REFINE_PROMPT_SEL_LC)
 DEFAULT_TERM_STR = (
     "Make a list of terms and definitions that are defined in the context, "

index.json DELETED Viewed

The diff for this file is too large to render. See raw diff

initial_index/docstore.json ADDED Viewed

The diff for this file is too large to render. See raw diff

initial_index/index_store.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"index_store/data": {"894d0818-364d-47d0-8241-3d5bcfc37908": {"__type__": "vector_store", "__data__": {"index_id": "894d0818-364d-47d0-8241-3d5bcfc37908", "summary": null, "nodes_dict": {"ec371470-e24a-461c-8462-aab6aef3b298": "ec371470-e24a-461c-8462-aab6aef3b298", "2f0b5eab-e6ce-4861-b04e-48e464d11c25": "2f0b5eab-e6ce-4861-b04e-48e464d11c25", "010d0eb6-5e7d-45a5-ab2d-2efc2546be6e": "010d0eb6-5e7d-45a5-ab2d-2efc2546be6e", "150d5ea1-3e98-471f-8074-37c5a5068e9e": "150d5ea1-3e98-471f-8074-37c5a5068e9e", "a4a6684a-715e-440c-94eb-960720407380": "a4a6684a-715e-440c-94eb-960720407380", "360346f6-5868-433e-b8ae-ae373f1b1398": "360346f6-5868-433e-b8ae-ae373f1b1398", "94ecc25d-c7e8-4d78-a6b1-32391049f233": "94ecc25d-c7e8-4d78-a6b1-32391049f233", "2bfe8914-bca7-4df9-9a47-81db053b1f20": "2bfe8914-bca7-4df9-9a47-81db053b1f20", "401e1c3e-559e-44c0-80a2-279a94030490": "401e1c3e-559e-44c0-80a2-279a94030490", "793d54ec-17ca-48dc-aba0-ba66e84d29cd": "793d54ec-17ca-48dc-aba0-ba66e84d29cd", "a33209a0-3195-4e12-b39d-861eed88dc80": "a33209a0-3195-4e12-b39d-861eed88dc80", "25ec0f90-0bf9-4e2c-ab5f-e55592e505b0": "25ec0f90-0bf9-4e2c-ab5f-e55592e505b0", "65938051-881c-4f6c-aa4a-d5678629c67e": "65938051-881c-4f6c-aa4a-d5678629c67e", "adb2be9f-4ab0-4a0d-92ba-e13c101899e1": "adb2be9f-4ab0-4a0d-92ba-e13c101899e1", "52bfbf28-911d-423a-9f55-55ac218cee0d": "52bfbf28-911d-423a-9f55-55ac218cee0d", "bfbeb775-00d3-4023-987a-85cacb733f58": "bfbeb775-00d3-4023-987a-85cacb733f58", "95ab52b8-c1f7-4287-b535-4e94c0c05fbe": "95ab52b8-c1f7-4287-b535-4e94c0c05fbe", "e00e4430-8d8c-4f7f-a631-5aa42178b366": "e00e4430-8d8c-4f7f-a631-5aa42178b366", "323da6be-33d9-48c6-a478-cadae3676869": "323da6be-33d9-48c6-a478-cadae3676869", "8210491f-670f-4c4f-8764-4dda73c51cad": "8210491f-670f-4c4f-8764-4dda73c51cad", "cbc458ea-5094-47ac-b480-2ebd8bd4bfad": "cbc458ea-5094-47ac-b480-2ebd8bd4bfad", "e62d3fd9-bb4e-49d2-a33c-ebbd54479bb2": "e62d3fd9-bb4e-49d2-a33c-ebbd54479bb2", "23f3ca55-4d90-4371-8110-b5bee7df7e0c": "23f3ca55-4d90-4371-8110-b5bee7df7e0c", "9379b995-7bf4-4cfe-a734-55d0c53f6fb8": "9379b995-7bf4-4cfe-a734-55d0c53f6fb8", "04a9bd09-91f1-4eb9-ac16-439a5facb242": "04a9bd09-91f1-4eb9-ac16-439a5facb242", "72cb6e83-5c98-491b-9c5d-4086f7ee7e50": "72cb6e83-5c98-491b-9c5d-4086f7ee7e50", "b9fa2947-bfaf-43d2-a6ca-c2b62818cb65": "b9fa2947-bfaf-43d2-a6ca-c2b62818cb65", "519dba62-2b32-4262-aceb-709d3bfaff99": "519dba62-2b32-4262-aceb-709d3bfaff99", "5e5c1b21-c385-41b7-ad05-8154d0e527ea": "5e5c1b21-c385-41b7-ad05-8154d0e527ea", "95e60817-e54a-4de3-a2ee-2247f64016c7": "95e60817-e54a-4de3-a2ee-2247f64016c7", "0e7b6a6b-68b0-4cb5-af47-ed6145c43055": "0e7b6a6b-68b0-4cb5-af47-ed6145c43055", "7144e1fb-dfbc-4666-90d0-002fcaf3a304": "7144e1fb-dfbc-4666-90d0-002fcaf3a304", "a1163e41-e5f6-419a-884e-d1857eb9e606": "a1163e41-e5f6-419a-884e-d1857eb9e606", "f098a7cd-68c1-4b51-a568-4e9dce983150": "f098a7cd-68c1-4b51-a568-4e9dce983150", "031420d8-c23b-4903-ab64-e667e1964317": "031420d8-c23b-4903-ab64-e667e1964317", "d6511485-85c8-4d56-8a0e-fe9ccd6b4b78": "d6511485-85c8-4d56-8a0e-fe9ccd6b4b78", "75d9a8f3-fd4d-4187-ada2-329ec74d6aba": "75d9a8f3-fd4d-4187-ada2-329ec74d6aba", "b92fab54-0c5e-4c9d-85b7-1ecb8d665985": "b92fab54-0c5e-4c9d-85b7-1ecb8d665985", "1f22e89c-95b8-440b-9940-15fbc6b7202f": "1f22e89c-95b8-440b-9940-15fbc6b7202f"}, "doc_id_dict": {"a708f585-999f-4072-b4e9-e5acfd0afd3d": ["ec371470-e24a-461c-8462-aab6aef3b298", "2f0b5eab-e6ce-4861-b04e-48e464d11c25", "010d0eb6-5e7d-45a5-ab2d-2efc2546be6e", "150d5ea1-3e98-471f-8074-37c5a5068e9e", "a4a6684a-715e-440c-94eb-960720407380", "360346f6-5868-433e-b8ae-ae373f1b1398", "94ecc25d-c7e8-4d78-a6b1-32391049f233", "2bfe8914-bca7-4df9-9a47-81db053b1f20", "401e1c3e-559e-44c0-80a2-279a94030490", "793d54ec-17ca-48dc-aba0-ba66e84d29cd", "a33209a0-3195-4e12-b39d-861eed88dc80", "25ec0f90-0bf9-4e2c-ab5f-e55592e505b0", "65938051-881c-4f6c-aa4a-d5678629c67e", "adb2be9f-4ab0-4a0d-92ba-e13c101899e1", "52bfbf28-911d-423a-9f55-55ac218cee0d", "bfbeb775-00d3-4023-987a-85cacb733f58", "95ab52b8-c1f7-4287-b535-4e94c0c05fbe", "e00e4430-8d8c-4f7f-a631-5aa42178b366", "323da6be-33d9-48c6-a478-cadae3676869", "8210491f-670f-4c4f-8764-4dda73c51cad", "cbc458ea-5094-47ac-b480-2ebd8bd4bfad", "e62d3fd9-bb4e-49d2-a33c-ebbd54479bb2", "23f3ca55-4d90-4371-8110-b5bee7df7e0c", "9379b995-7bf4-4cfe-a734-55d0c53f6fb8", "04a9bd09-91f1-4eb9-ac16-439a5facb242", "72cb6e83-5c98-491b-9c5d-4086f7ee7e50", "b9fa2947-bfaf-43d2-a6ca-c2b62818cb65", "519dba62-2b32-4262-aceb-709d3bfaff99", "5e5c1b21-c385-41b7-ad05-8154d0e527ea", "95e60817-e54a-4de3-a2ee-2247f64016c7", "0e7b6a6b-68b0-4cb5-af47-ed6145c43055", "7144e1fb-dfbc-4666-90d0-002fcaf3a304", "a1163e41-e5f6-419a-884e-d1857eb9e606", "f098a7cd-68c1-4b51-a568-4e9dce983150", "031420d8-c23b-4903-ab64-e667e1964317", "d6511485-85c8-4d56-8a0e-fe9ccd6b4b78", "75d9a8f3-fd4d-4187-ada2-329ec74d6aba", "b92fab54-0c5e-4c9d-85b7-1ecb8d665985", "1f22e89c-95b8-440b-9940-15fbc6b7202f"]}, "embeddings_dict": {}}}}}

initial_index/vector_store.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,6 +1,16 @@
-langchain==0.0.128
-llama-index==0.5.4
-Pillow==9.4.0
-sentencepiece
 streamlit==1.19.0
-torch==1.13.0

+altair==4.2.2
+langchain==0.0.154
+llama-index==0.6.13
+numpy==1.24.3
+openai==0.27.7
+pandas==2.0.2
+Pillow==9.5.0
+pytesseract==0.3.10
+sentencepiece==0.1.99
+SQLAlchemy==2.0.15
 streamlit==1.19.0
+streamlit-chat==0.0.2.2
+tokenizers==0.13.3
+torch==2.0.1
+torchvision==0.15.2
+transformers==4.29.2