Spaces:

Abhilashvj
/

haystack_QA

Runtime error

App Files Files Community

abhi001vj commited on Dec 23, 2022

Commit

005d125

•

1 Parent(s): 9d9cdb3

added the required app

Browse files

Files changed (2) hide show

app.py +175 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import os
+import sys
+import logging
+from pathlib import Path
+from json import JSONDecodeError
+import pandas as pd
+import streamlit as st
+from annotated_text import annotation
+from markdown import markdown
+import json
+from haystack import Document
+import pandas as pd
+from haystack.document_stores import PineconeDocumentStore
+from haystack.nodes import EmbeddingRetriever, FARMReader
+from haystack.pipelines import ExtractiveQAPipeline
+@st.cache
+def create_doc_store():
+    document_store = PineconeDocumentStore(
+        api_key= st.secrets["pinecone_apikey"],
+        index='qa_demo',
+        similarity="cosine",
+        embedding_dim=768
+    )
+    return document_store
+@st.cache
+def create_pipe(document_store):
+    retriever = EmbeddingRetriever(
+    document_store=document_store,
+    embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
+    model_format="sentence_transformers",
+    )
+    reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)
+    pipe = ExtractiveQAPipeline(reader, retriever)
+    return pipe
+def query(pipe, question, top_k_reader, top_k_retriever):
+    res = pipe.run(
+        query=question, params={"Retriever": {"top_k": top_k_retriever}, "Reader": {"top_k": top_k_reader}}
+    )
+    answer_df = []
+    # for r in res['answers']:
+    #     ans_dict = res['answers'][0].meta
+    #     ans_dict["answer"] = r.context
+    #     answer_df.append(ans_dict)
+    # result = pd.DataFrame(answer_df)
+    # result.columns = ["Source","Title","Year","Link","Answer"]
+    # result[["Answer","Link","Source","Title","Year"]]
+    return res
+document_store = create_doc_store()
+pipe = create_pipe(create_pipe)
+def set_state_if_absent(key, value):
+    if key not in st.session_state:
+        st.session_state[key] = value
+# Adjust to a question that you would like users to see in the search bar when they load the UI:
+DEFAULT_QUESTION_AT_STARTUP = os.getenv("DEFAULT_QUESTION_AT_STARTUP", "My blog post discusses remote work. Give me statistics.")
+DEFAULT_ANSWER_AT_STARTUP = os.getenv("DEFAULT_ANSWER_AT_STARTUP", "7% more remote workers have been at their current organization for 5 years or fewer")
+# Sliders
+DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
+DEFAULT_NUMBER_OF_ANSWERS = int(os.getenv("DEFAULT_NUMBER_OF_ANSWERS", "3"))
+st.set_page_config(page_title="Haystack Demo", page_icon="https://haystack.deepset.ai/img/HaystackIcon.png")
+# Persistent state
+set_state_if_absent("question", DEFAULT_QUESTION_AT_STARTUP)
+set_state_if_absent("answer", DEFAULT_ANSWER_AT_STARTUP)
+# Small callback to reset the interface in case the text of the question changes
+def reset_results(*args):
+    st.session_state.answer = None
+    st.session_state.results = None
+    st.session_state.raw_json = None
+# Title
+st.write("# Haystack Demo - Explore the world")
+st.markdown(
+    """
+This demo takes its data from two sample data csv with statistics on various topics
+Ask any question on this topic and see if Haystack can find the correct answer to your query!
+*Note: do not use keywords, but full-fledged questions.* The demo is not optimized to deal with keyword queries and might misunderstand you.
+""",
+    unsafe_allow_html=True,
+)
+# Sidebar
+st.sidebar.header("Options")
+top_k_reader = st.sidebar.slider(
+    "Max. number of answers",
+    min_value=1,
+    max_value=10,
+    value=DEFAULT_NUMBER_OF_ANSWERS,
+    step=1,
+    on_change=reset_results,
+)
+top_k_retriever = st.sidebar.slider(
+    "Max. number of documents from retriever",
+    min_value=1,
+    max_value=10,
+    value=DEFAULT_DOCS_FROM_RETRIEVER,
+    step=1,
+    on_change=reset_results,
+)
+# data_files = st.file_uploader(
+#         "upload", type=["csv"], accept_multiple_files=True, label_visibility="hidden"
+#     )
+# for data_file in data_files:
+#     # Upload file
+#     if data_file:
+#         raw_json = upload_doc(data_file)
+question = st.text_input(
+        value=st.session_state.question,
+        max_chars=100,
+        on_change=reset_results,
+        label="question",
+        label_visibility="hidden",
+    )
+col1, col2 = st.columns(2)
+col1.markdown("<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
+col2.markdown("<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
+# Run button
+run_pressed = col1.button("Run")
+if run_pressed:
+    run_query = (
+        run_pressed or question != st.session_state.question
+    )
+    # Get results for query
+    if run_query and question:
+        reset_results()
+        st.session_state.question = question
+        with st.spinner(
+            "🧠 &nbsp;&nbsp; Performing neural search on documents... \n "
+            "Do you want to optimize speed or accuracy? \n"
+            "Check out the docs: https://haystack.deepset.ai/usage/optimization "
+        ):
+            try:
+                st.session_state.results  = query(
+                    question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever
+                )
+            except JSONDecodeError as je:
+                st.error("👓 &nbsp;&nbsp; An error occurred reading the results. Is the document store working?")
+                return
+            except Exception as e:
+                logging.exception(e)
+                if "The server is busy processing requests" in str(e) or "503" in str(e):
+                    st.error("🧑‍🌾 &nbsp;&nbsp; All our workers are busy! Try again later.")
+                else:
+                    st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
+                return
+if st.session_state.results:
+    st.write("## Results:")
+    for count, result in enumerate(st.session_state.results['answers']):
+        answer, context = result.answer, result.context
+        start_idx = context.find(answer)
+        end_idx = start_idx + len(answer)
+        source = f"[{result.meta['Title']}]({result.meta['link']})"
+        # Hack due to this bug: https://github.com/streamlit/streamlit/issues/3190
+        st.write(
+            markdown(f'**Source:** {source} \n {context[:start_idx] } {str(annotation(answer, "ANSWER", "#8ef"))} {context[end_idx:]} \n '),
+            unsafe_allow_html=True,
+        )
+        st.markdown(f"**Relevance:** {result['relevance']} -  **Source:** {source}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+streamlit
+farm-haystack[pinecone]
+pinecone-client
+datasets