abhi001vj commited on
Commit
005d125
β€’
1 Parent(s): 9d9cdb3

added the required app

Browse files
Files changed (2) hide show
  1. app.py +175 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import logging
4
+ from pathlib import Path
5
+ from json import JSONDecodeError
6
+ import pandas as pd
7
+ import streamlit as st
8
+ from annotated_text import annotation
9
+ from markdown import markdown
10
+ import json
11
+ from haystack import Document
12
+ import pandas as pd
13
+ from haystack.document_stores import PineconeDocumentStore
14
+ from haystack.nodes import EmbeddingRetriever, FARMReader
15
+ from haystack.pipelines import ExtractiveQAPipeline
16
+
17
+ @st.cache
18
+ def create_doc_store():
19
+ document_store = PineconeDocumentStore(
20
+ api_key= st.secrets["pinecone_apikey"],
21
+ index='qa_demo',
22
+ similarity="cosine",
23
+ embedding_dim=768
24
+ )
25
+ return document_store
26
+
27
+ @st.cache
28
+ def create_pipe(document_store):
29
+ retriever = EmbeddingRetriever(
30
+ document_store=document_store,
31
+ embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
32
+ model_format="sentence_transformers",
33
+ )
34
+ reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=False)
35
+ pipe = ExtractiveQAPipeline(reader, retriever)
36
+ return pipe
37
+
38
+ def query(pipe, question, top_k_reader, top_k_retriever):
39
+ res = pipe.run(
40
+ query=question, params={"Retriever": {"top_k": top_k_retriever}, "Reader": {"top_k": top_k_reader}}
41
+ )
42
+ answer_df = []
43
+ # for r in res['answers']:
44
+ # ans_dict = res['answers'][0].meta
45
+ # ans_dict["answer"] = r.context
46
+ # answer_df.append(ans_dict)
47
+ # result = pd.DataFrame(answer_df)
48
+ # result.columns = ["Source","Title","Year","Link","Answer"]
49
+ # result[["Answer","Link","Source","Title","Year"]]
50
+ return res
51
+
52
+ document_store = create_doc_store()
53
+ pipe = create_pipe(create_pipe)
54
+
55
+ def set_state_if_absent(key, value):
56
+ if key not in st.session_state:
57
+ st.session_state[key] = value
58
+
59
+ # Adjust to a question that you would like users to see in the search bar when they load the UI:
60
+ DEFAULT_QUESTION_AT_STARTUP = os.getenv("DEFAULT_QUESTION_AT_STARTUP", "My blog post discusses remote work. Give me statistics.")
61
+ DEFAULT_ANSWER_AT_STARTUP = os.getenv("DEFAULT_ANSWER_AT_STARTUP", "7% more remote workers have been at their current organization for 5 years or fewer")
62
+
63
+ # Sliders
64
+ DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
65
+ DEFAULT_NUMBER_OF_ANSWERS = int(os.getenv("DEFAULT_NUMBER_OF_ANSWERS", "3"))
66
+
67
+
68
+ st.set_page_config(page_title="Haystack Demo", page_icon="https://haystack.deepset.ai/img/HaystackIcon.png")
69
+
70
+ # Persistent state
71
+ set_state_if_absent("question", DEFAULT_QUESTION_AT_STARTUP)
72
+ set_state_if_absent("answer", DEFAULT_ANSWER_AT_STARTUP)
73
+
74
+
75
+ # Small callback to reset the interface in case the text of the question changes
76
+ def reset_results(*args):
77
+ st.session_state.answer = None
78
+ st.session_state.results = None
79
+ st.session_state.raw_json = None
80
+
81
+ # Title
82
+ st.write("# Haystack Demo - Explore the world")
83
+ st.markdown(
84
+ """
85
+ This demo takes its data from two sample data csv with statistics on various topics
86
+ Ask any question on this topic and see if Haystack can find the correct answer to your query!
87
+ *Note: do not use keywords, but full-fledged questions.* The demo is not optimized to deal with keyword queries and might misunderstand you.
88
+ """,
89
+ unsafe_allow_html=True,
90
+ )
91
+
92
+ # Sidebar
93
+ st.sidebar.header("Options")
94
+ top_k_reader = st.sidebar.slider(
95
+ "Max. number of answers",
96
+ min_value=1,
97
+ max_value=10,
98
+ value=DEFAULT_NUMBER_OF_ANSWERS,
99
+ step=1,
100
+ on_change=reset_results,
101
+ )
102
+ top_k_retriever = st.sidebar.slider(
103
+ "Max. number of documents from retriever",
104
+ min_value=1,
105
+ max_value=10,
106
+ value=DEFAULT_DOCS_FROM_RETRIEVER,
107
+ step=1,
108
+ on_change=reset_results,
109
+ )
110
+ # data_files = st.file_uploader(
111
+ # "upload", type=["csv"], accept_multiple_files=True, label_visibility="hidden"
112
+ # )
113
+ # for data_file in data_files:
114
+ # # Upload file
115
+ # if data_file:
116
+ # raw_json = upload_doc(data_file)
117
+
118
+ question = st.text_input(
119
+ value=st.session_state.question,
120
+ max_chars=100,
121
+ on_change=reset_results,
122
+ label="question",
123
+ label_visibility="hidden",
124
+ )
125
+ col1, col2 = st.columns(2)
126
+ col1.markdown("<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
127
+ col2.markdown("<style>.stButton button {width:100%;}</style>", unsafe_allow_html=True)
128
+
129
+ # Run button
130
+ run_pressed = col1.button("Run")
131
+ if run_pressed:
132
+
133
+ run_query = (
134
+ run_pressed or question != st.session_state.question
135
+ )
136
+ # Get results for query
137
+ if run_query and question:
138
+ reset_results()
139
+ st.session_state.question = question
140
+
141
+ with st.spinner(
142
+ "🧠 &nbsp;&nbsp; Performing neural search on documents... \n "
143
+ "Do you want to optimize speed or accuracy? \n"
144
+ "Check out the docs: https://haystack.deepset.ai/usage/optimization "
145
+ ):
146
+ try:
147
+ st.session_state.results = query(
148
+ question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever
149
+ )
150
+ except JSONDecodeError as je:
151
+ st.error("πŸ‘“ &nbsp;&nbsp; An error occurred reading the results. Is the document store working?")
152
+ return
153
+ except Exception as e:
154
+ logging.exception(e)
155
+ if "The server is busy processing requests" in str(e) or "503" in str(e):
156
+ st.error("πŸ§‘β€πŸŒΎ &nbsp;&nbsp; All our workers are busy! Try again later.")
157
+ else:
158
+ st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
159
+ return
160
+
161
+ if st.session_state.results:
162
+
163
+ st.write("## Results:")
164
+
165
+ for count, result in enumerate(st.session_state.results['answers']):
166
+ answer, context = result.answer, result.context
167
+ start_idx = context.find(answer)
168
+ end_idx = start_idx + len(answer)
169
+ source = f"[{result.meta['Title']}]({result.meta['link']})"
170
+ # Hack due to this bug: https://github.com/streamlit/streamlit/issues/3190
171
+ st.write(
172
+ markdown(f'**Source:** {source} \n {context[:start_idx] } {str(annotation(answer, "ANSWER", "#8ef"))} {context[end_idx:]} \n '),
173
+ unsafe_allow_html=True,
174
+ )
175
+ st.markdown(f"**Relevance:** {result['relevance']} - **Source:** {source}")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ farm-haystack[pinecone]
3
+ pinecone-client
4
+ datasets