karshreya98 commited on
Commit
e48d908
β€’
1 Parent(s): 2e4daca

added changes to sync with master

Browse files
.idea/.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (search_mlReply)" project-jdk-type="Python SDK" />
4
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/search_mlReply.iml" filepath="$PROJECT_DIR$/.idea/search_mlReply.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/search_mlReply.iml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$">
5
+ <excludeFolder url="file://$MODULE_DIR$/venv" />
6
+ </content>
7
+ <orderEntry type="inheritedJdk" />
8
+ <orderEntry type="sourceFolder" forTests="false" />
9
+ </component>
10
+ </module>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
.streamlit/config.toml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [theme]
2
+ primaryColor = "#E694FF"
3
+ backgroundColor = "#FFFFFF"
4
+ secondaryBackgroundColor = "#F0F0F0"
5
+ textColor = "#262730"
6
+ font = "sans-serif"
app.py CHANGED
@@ -7,21 +7,12 @@ from annotated_text import annotation
7
  from json import JSONDecodeError
8
  from markdown import markdown
9
  from utils.config import parser
10
- from utils.haystack import start_document_store, start_haystack_extractive, start_haystack_rag, query, start_preprocessor_node, start_retriever, start_reader
11
  from utils.ui import reset_results, set_initial_state
12
-
13
- # Sliders
14
- DEFAULT_DOCS_FROM_RETRIEVER = int(os.getenv("DEFAULT_DOCS_FROM_RETRIEVER", "3"))
15
- DEFAULT_NUMBER_OF_ANSWERS = int(os.getenv("DEFAULT_NUMBER_OF_ANSWERS", "3"))
16
-
17
- # Labels for the evaluation
18
- #EVAL_LABELS = os.getenv("EVAL_FILE", str(Path(__file__).parent / "eval_labels_volksbank_QA.csv"))
19
 
20
  # Whether the file upload should be enabled or not
21
  DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))
22
- UPLOAD_DOCUMENTS = []
23
-
24
-
25
 
26
  # Define a function to handle file uploads
27
  def upload_files():
@@ -58,23 +49,33 @@ def process_file(data_file, preprocesor, document_store):
58
 
59
  try:
60
  args = parser.parse_args()
61
-
62
- set_initial_state()
63
- st.write('# '+args.name)
64
- session_state = st.session_state
65
-
66
  preprocesor = start_preprocessor_node()
67
- document_store = start_document_store(args.store)
68
  retriever = start_retriever(document_store)
69
  reader = start_reader()
70
- if args.task == 'extractive':
71
- pipeline = start_haystack_extractive(document_store, retriever, reader)
72
- else:
73
- pipeline = start_haystack_rag(document_store, retriever)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- # Sidebar
76
 
77
- #st.sidebar.header("Options")
78
 
79
  # File upload block
80
  if not DISABLE_FILE_UPLOAD:
@@ -97,66 +98,102 @@ try:
97
  st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; ❌ ")
98
  st.sidebar.write("_This file could not be parsed, see the logs for more information._")
99
 
100
-
101
-
102
  # Search bar
103
- question = st.text_input("Ask a question", value=st.session_state.question, max_chars=100, on_change=reset_results)
104
- # question = "what is Pi?"
105
 
106
  run_pressed = st.button("Run")
107
- # run_pressed = True
108
 
109
  run_query = (
110
- run_pressed or question != st.session_state.question
111
  )
112
 
113
  # Get results for query
114
  if run_query and question:
115
- reset_results()
116
- st.session_state.question = question
117
- with st.spinner("πŸ”Ž &nbsp;&nbsp; Running your pipeline"):
118
- try:
119
- st.session_state.results = query(pipeline, question)
120
- except JSONDecodeError as je:
121
- st.error(
122
- "πŸ‘“ &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
123
- )
124
- except Exception as e:
125
- logging.exception(e)
126
- st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
127
-
128
-
129
-
130
- if st.session_state.results:
131
- results = st.session_state.results
132
-
133
- if args.task == 'extractive':
134
- answers = results['answers']
135
- for count, answer in enumerate(answers):
136
- if answer.answer:
137
- text, context = answer.answer, answer.context
138
- start_idx = context.find(text)
139
- end_idx = start_idx + len(text)
140
- st.write(
141
- f" Answer: {markdown(context[:start_idx] + str(annotation(body=text, label='ANSWER', background='#964448', color='#ffffff')) + context[end_idx:])}",
142
- unsafe_allow_html=True,
143
  )
144
- else:
145
- st.info(
146
- "πŸ€” &nbsp;&nbsp; Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
 
 
 
 
 
 
 
 
 
 
 
147
  )
148
- elif args.task == 'rag':
149
- st.write(f" Answer: {results['results'][0]}")
150
-
151
- # Extract and display information from the 'documents' list
152
- retrieved_documents = results['documents']
153
- st.subheader("Retriever Results:")
154
- for document in retrieved_documents:
155
- st.write(f"Document Name: {document.meta['name']}")
156
- st.write(f"Score: {document.score}")
157
- st.write(f"Text: {document.content}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  except SystemExit as e:
159
- # This exception will be raised if --help or invalid command line arguments
160
- # are used. Currently streamlit prevents the program from exiting normally
161
- # so we have to do a hard exit.
162
- os._exit(e.code)
 
7
  from json import JSONDecodeError
8
  from markdown import markdown
9
  from utils.config import parser
10
+ from utils.haystack import start_document_store, query, initialize_pipeline, start_preprocessor_node, start_retriever, start_reader
11
  from utils.ui import reset_results, set_initial_state
12
+ import pandas as pd
 
 
 
 
 
 
13
 
14
  # Whether the file upload should be enabled or not
15
  DISABLE_FILE_UPLOAD = bool(os.getenv("DISABLE_FILE_UPLOAD"))
 
 
 
16
 
17
  # Define a function to handle file uploads
18
  def upload_files():
 
49
 
50
  try:
51
  args = parser.parse_args()
52
+ #session_state = st.session_state
 
 
 
 
53
  preprocesor = start_preprocessor_node()
54
+ document_store = start_document_store(type=args.store)
55
  retriever = start_retriever(document_store)
56
  reader = start_reader()
57
+ st.set_page_config(
58
+ page_title="test",
59
+ layout="centered",
60
+ page_icon = (":shark:"),
61
+ menu_items={
62
+ 'Get Help': 'https://www.extremelycoolapp.com/help',
63
+ 'Report a bug': "https://www.extremelycoolapp.com/bug",
64
+ 'About': "# This is a header. This is an *extremely* cool app!"
65
+ })
66
+ st.sidebar.image("ml_logo.png", use_column_width=True)
67
+
68
+ # Sidebar for Task Selection
69
+ st.sidebar.header('Options:')
70
+ task_selection = st.sidebar.radio('Select the task:', ['Extractive', 'Generative'])
71
+
72
+ pipeline_rag = initialize_pipeline("rag", document_store, retriever, reader)
73
+ pipeline_extractive = initialize_pipeline("extractive", document_store, retriever, reader)
74
+
75
+ set_initial_state()
76
 
77
+ st.write('# ' + args.name)
78
 
 
79
 
80
  # File upload block
81
  if not DISABLE_FILE_UPLOAD:
 
98
  st.sidebar.write(str(data_file.name) + " &nbsp;&nbsp; ❌ ")
99
  st.sidebar.write("_This file could not be parsed, see the logs for more information._")
100
 
101
+ if "question" not in st.session_state:
102
+ st.session_state.question = ""
103
  # Search bar
104
+ question = st.text_input("", value=st.session_state.question, max_chars=100, on_change=reset_results)
 
105
 
106
  run_pressed = st.button("Run")
 
107
 
108
  run_query = (
109
+ run_pressed or question != st.session_state.question #or task_selection != st.session_state.task
110
  )
111
 
112
  # Get results for query
113
  if run_query and question:
114
+ if task_selection == 'Extractive':
115
+ reset_results()
116
+ st.session_state.question = question
117
+ with st.spinner("πŸ”Ž &nbsp;&nbsp; Running your pipeline"):
118
+ try:
119
+ st.session_state.results_extractive = query(pipeline_extractive, question)
120
+ st.session_state.task = task_selection
121
+ except JSONDecodeError as je:
122
+ st.error(
123
+ "πŸ‘“ &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  )
125
+ except Exception as e:
126
+ logging.exception(e)
127
+ st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
128
+
129
+ elif task_selection == 'Generative':
130
+ reset_results()
131
+ st.session_state.question = question
132
+ with st.spinner("πŸ”Ž &nbsp;&nbsp; Running your pipeline"):
133
+ try:
134
+ st.session_state.results_generative = query(pipeline_rag, question)
135
+ st.session_state.task = task_selection
136
+ except JSONDecodeError as je:
137
+ st.error(
138
+ "πŸ‘“ &nbsp;&nbsp; An error occurred reading the results. Is the document store working?"
139
  )
140
+ except Exception as e:
141
+ logging.exception(e)
142
+ st.error("🐞 &nbsp;&nbsp; An error occurred during the request.")
143
+
144
+ # Display results
145
+ if (st.session_state.results_extractive or st.session_state.results_generative) and run_query:
146
+
147
+ # Handle Extractive Answers
148
+ if task_selection == 'Extractive':
149
+ results = st.session_state.results_extractive
150
+
151
+ st.subheader("Extracted Answers:")
152
+
153
+ if 'answers' in results:
154
+ answers = results['answers']
155
+ treshold = 0.2
156
+ higher_then_treshold = any(ans.score > treshold for ans in answers)
157
+ if not higher_then_treshold:
158
+ st.markdown(f"<span style='color:red'>Please note none of the answers achieved a score higher then {int(treshold) * 100}%. Which probably means that the desired answer is not in the searched documents.</span>", unsafe_allow_html=True)
159
+ for count, answer in enumerate(answers):
160
+ if answer.answer:
161
+ text, context = answer.answer, answer.context
162
+ start_idx = context.find(text)
163
+ end_idx = start_idx + len(text)
164
+ score = round(answer.score, 3)
165
+ st.markdown(f"**Answer {count + 1}:**")
166
+ st.markdown(
167
+ context[:start_idx] + str(annotation(body=text, label=f'SCORE {score}', background='#964448', color='#ffffff')) + context[end_idx:],
168
+ unsafe_allow_html=True,
169
+ )
170
+ else:
171
+ st.info(
172
+ "πŸ€” &nbsp;&nbsp; Haystack is unsure whether any of the documents contain an answer to your question. Try to reformulate it!"
173
+ )
174
+
175
+ # Handle Generative Answers
176
+ elif task_selection == 'Generative':
177
+ results = st.session_state.results_generative
178
+ st.subheader("Generated Answer:")
179
+ if 'results' in results:
180
+ st.markdown("**Answer:**")
181
+ st.write(results['results'][0])
182
+
183
+ # Handle Retrieved Documents
184
+ if 'documents' in results:
185
+ retrieved_documents = results['documents']
186
+ st.subheader("Retriever Results:")
187
+
188
+ data = []
189
+ for i, document in enumerate(retrieved_documents):
190
+ # Truncate the content
191
+ truncated_content = (document.content[:150] + '...') if len(document.content) > 150 else document.content
192
+ data.append([i + 1, document.meta['name'], truncated_content])
193
+
194
+ # Convert data to DataFrame and display using Streamlit
195
+ df = pd.DataFrame(data, columns=['Ranked Context', 'Document Name', 'Content'])
196
+ st.table(df)
197
+
198
  except SystemExit as e:
199
+ os._exit(e.code)
 
 
 
ml_logo.png ADDED
utils/__pycache__/config.cpython-38.pyc ADDED
Binary file (1.51 kB). View file
 
utils/__pycache__/haystack.cpython-38.pyc ADDED
Binary file (3.73 kB). View file
 
utils/__pycache__/ui.cpython-38.pyc ADDED
Binary file (733 Bytes). View file
 
utils/config.py CHANGED
@@ -9,7 +9,7 @@ parser = argparse.ArgumentParser(description='This app lists animals')
9
  document_store_choices = ('inmemory', 'weaviate', 'milvus', 'opensearch')
10
  task_choices = ('extractive', 'rag')
11
  parser.add_argument('--store', choices=document_store_choices, default='inmemory', help='DocumentStore selection (default: %(default)s)')
12
- parser.add_argument('--task', choices=task_choices, default='rag', help='Task selection (default: %(default)s)')
13
  parser.add_argument('--name', default="My Search App")
14
 
15
  model_configs = {
 
9
  document_store_choices = ('inmemory', 'weaviate', 'milvus', 'opensearch')
10
  task_choices = ('extractive', 'rag')
11
  parser.add_argument('--store', choices=document_store_choices, default='inmemory', help='DocumentStore selection (default: %(default)s)')
12
+ #parser.add_argument('--task', choices=task_choices, default='rag', help='Task selection (default: %(default)s)')
13
  parser.add_argument('--name', default="My Search App")
14
 
15
  model_configs = {
utils/haystack.py CHANGED
@@ -63,6 +63,7 @@ def start_document_store(type: str):
63
  return_embedding=True)
64
  return document_store
65
 
 
66
  @st.cache_resource(show_spinner=False)
67
  def start_retriever(_document_store: BaseDocumentStore):
68
  print('initializing retriever')
@@ -104,8 +105,14 @@ def start_haystack_rag(_document_store: BaseDocumentStore, _retriever: Embedding
104
 
105
  return pipe
106
 
107
- @st.cache_data(show_spinner=True)
108
  def query(_pipeline, question):
109
  params = {}
110
  results = _pipeline.run(question, params=params)
111
- return results
 
 
 
 
 
 
 
63
  return_embedding=True)
64
  return document_store
65
 
66
+ # cached to make index and models load only at start
67
  @st.cache_resource(show_spinner=False)
68
  def start_retriever(_document_store: BaseDocumentStore):
69
  print('initializing retriever')
 
105
 
106
  return pipe
107
 
108
+ #@st.cache_data(show_spinner=True)
109
  def query(_pipeline, question):
110
  params = {}
111
  results = _pipeline.run(question, params=params)
112
+ return results
113
+
114
+ def initialize_pipeline(task, document_store, retriever, reader):
115
+ if task == 'extractive':
116
+ return start_haystack_extractive(document_store, retriever, reader)
117
+ elif task == 'rag':
118
+ return start_haystack_rag(document_store, retriever)
utils/ui.py CHANGED
@@ -6,7 +6,11 @@ def set_state_if_absent(key, value):
6
 
7
  def set_initial_state():
8
  set_state_if_absent("question", "Ask something here?")
9
- set_state_if_absent("results", None)
 
 
10
 
11
  def reset_results(*args):
12
- st.session_state.results = None
 
 
 
6
 
7
  def set_initial_state():
8
  set_state_if_absent("question", "Ask something here?")
9
+ set_state_if_absent("results_extractive", None)
10
+ set_state_if_absent("results_generative", None)
11
+ set_state_if_absent("task", None)
12
 
13
  def reset_results(*args):
14
+ st.session_state.results_extractive = None
15
+ st.session_state.results_generative = None
16
+ st.session_state.task = None