Engr-Saeed commited on
Commit
d76f892
1 Parent(s): 4fc4079

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -0
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ import docx2txt
4
+ import json
5
+ import pandas as pd
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ import os
8
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
9
+ import google.generativeai as genai
10
+ from langchain.vectorstores import FAISS
11
+ from langchain_google_genai import ChatGoogleGenerativeAI
12
+ from langchain.chains.question_answering import load_qa_chain
13
+ from langchain.prompts import PromptTemplate
14
+ from dotenv import load_dotenv
15
+
16
+ # Step 2: Load environment variable
17
+ load_dotenv()
18
+ api_key = os.getenv("GOOGLE_API_KEY")
19
+
20
+ # Step 3: Configure Google_API
21
+ genai.configure(api_key=api_key)
22
+
23
+ # Step 4: Function to read files and extract text
24
+ def extract_text(file):
25
+ text = ""
26
+ if file.name.endswith(".pdf"):
27
+ pdf_reader = PdfReader(file)
28
+ for page in pdf_reader.pages:
29
+ text += page.extract_text()
30
+ elif file.name.endswith(".docx"):
31
+ text = docx2txt.process(file)
32
+ elif file.name.endswith(".txt"):
33
+ text = file.read().decode("utf-8")
34
+ elif file.name.endswith(".csv"):
35
+ df = pd.read_csv(file)
36
+ text = df.to_string()
37
+ elif file.name.endswith(".xlsx"):
38
+ df = pd.read_excel(file)
39
+ text = df.to_string()
40
+ elif file.name.endswith(".json"):
41
+ data = json.load(file)
42
+ text = json.dumps(data, indent=4)
43
+ return text
44
+
45
+ # Step 5: Function to convert text into chunks
46
+ def get_text_chunks(text):
47
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
48
+ chunks = text_splitter.split_text(text)
49
+ return chunks
50
+
51
+ # Step 6: Function for converting chunks into embeddings and saving the FAISS index
52
+ def get_vector_store(text_chunks):
53
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
54
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
55
+
56
+ # Ensure the directory exists
57
+ if not os.path.exists("faiss_index"):
58
+ os.makedirs("faiss_index")
59
+
60
+ vector_store.save_local("faiss_index")
61
+ print("FAISS index saved successfully.")
62
+
63
+ # Step 7: Function to implement Gemini-Pro Model
64
+ def get_conversational_chain():
65
+ prompt_template = """
66
+ Answer the question as detailed as possible from the provided context. If the answer is not in
67
+ the provided context, just say, "The answer is not available in the context." Do not provide a wrong answer.\n\n
68
+ Context:\n {context}\n
69
+ Question: \n{question}\n
70
+
71
+ Answer:
72
+ """
73
+ model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)
74
+ prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
75
+ chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
76
+ return chain
77
+
78
+ # Step 8: Function to take inputs from user and generate response
79
+ def user_input(user_question):
80
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
81
+ new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
82
+ docs = new_db.similarity_search(user_question)
83
+ chain = get_conversational_chain()
84
+ response = chain({"input_documents": docs, "question": user_question}, return_only_outputs=True)
85
+ return response["output_text"]
86
+
87
+ # Step 9: Streamlit App
88
+ def main():
89
+ st.set_page_config(page_title="RAG Chatbot")
90
+ st.header("Chat with Multiple Files using RAG and Gemini ")
91
+
92
+ user_question = st.text_input("Ask a Question")
93
+
94
+ if user_question:
95
+ with st.spinner("Processing your question..."):
96
+ response = user_input(user_question)
97
+ st.write("Reply: ", response)
98
+
99
+ with st.sidebar:
100
+ st.title("Upload Files:")
101
+ uploaded_files = st.file_uploader("Upload your files", accept_multiple_files=True, type=["pdf", "docx", "txt", "csv", "xlsx", "json"])
102
+ if st.button("Submit & Process"):
103
+ if uploaded_files:
104
+ with st.spinner("Processing files..."):
105
+ combined_text = ""
106
+ for file in uploaded_files:
107
+ combined_text += extract_text(file) + "\n"
108
+ text_chunks = get_text_chunks(combined_text)
109
+ get_vector_store(text_chunks)
110
+ st.success("Files processed and indexed successfully!")
111
+ else:
112
+ st.error("Please upload at least one file.")
113
+
114
+ if __name__ == "__main__":
115
+ main()