BishanSingh246 commited on
Commit
22894bf
1 Parent(s): d302573

First Commit

Browse files
Future Wealth Gain Brochure.pdf ADDED
Binary file (653 kB). View file
 
Smart Protect Goal Brochure.pdf ADDED
Binary file (733 kB). View file
 
app.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module provides functions for working with PDF files and URLs. It uses the urllib.request library
3
+ to download files from URLs, and the fitz library to extract text from PDF files. And GPT3 modules to generate
4
+ text completions.
5
+ """
6
+ import urllib.request
7
+ import fitz
8
+ import re
9
+ import numpy as np
10
+ import tensorflow_hub as hub
11
+ import openai
12
+ import gradio as gr
13
+ import os
14
+ from sklearn.neighbors import NearestNeighbors
15
+ from pdfQuestions import dataFromPDF
16
+
17
+ def download_pdf(url, output_path):
18
+ urllib.request.urlretrieve(url, output_path)
19
+
20
+
21
+ def preprocess(text):
22
+ text = text.replace('\n', ' ')
23
+ text = re.sub('\s+', ' ', text)
24
+ return text
25
+
26
+
27
+ def pdf_to_text(path, start_page=1, end_page=None):
28
+ doc = fitz.open(path)
29
+ total_pages = doc.page_count
30
+
31
+ if end_page is None:
32
+ end_page = total_pages
33
+
34
+ text_list = []
35
+
36
+ for i in range(start_page-1, end_page):
37
+ text = doc.load_page(i).get_text("text")
38
+ text = preprocess(text)
39
+ text_list.append(text)
40
+
41
+ doc.close()
42
+ return text_list
43
+
44
+
45
+ def text_to_chunks(texts, word_length=150, start_page=1):
46
+ text_toks = [t.split(' ') for t in texts]
47
+ page_nums = []
48
+ chunks = []
49
+
50
+ for idx, words in enumerate(text_toks):
51
+ for i in range(0, len(words), word_length):
52
+ chunk = words[i:i+word_length]
53
+ if (i+word_length) > len(words) and (len(chunk) < word_length) and (
54
+ len(text_toks) != (idx+1)):
55
+ text_toks[idx+1] = chunk + text_toks[idx+1]
56
+ continue
57
+ chunk = ' '.join(chunk).strip()
58
+ chunk = f'[{idx+start_page}]' + ' ' + '"' + chunk + '"'
59
+ chunks.append(chunk)
60
+ return chunks
61
+
62
+
63
+ class SemanticSearch:
64
+
65
+ def __init__(self):
66
+ self.use = hub.load('https://tfhub.dev/google/universal-sentence-encoder/4')
67
+ self.fitted = False
68
+
69
+
70
+ def fit(self, data, batch=1000, n_neighbors=5):
71
+ self.data = data
72
+ self.embeddings = self.get_text_embedding(data, batch=batch)
73
+ n_neighbors = min(n_neighbors, len(self.embeddings))
74
+ self.nn = NearestNeighbors(n_neighbors=n_neighbors)
75
+ self.nn.fit(self.embeddings)
76
+ self.fitted = True
77
+
78
+
79
+ def __call__(self, text, return_data=True):
80
+ inp_emb = self.use([text])
81
+ neighbors = self.nn.kneighbors(inp_emb, return_distance=False)[0]
82
+
83
+ if return_data:
84
+ return [self.data[i] for i in neighbors]
85
+ else:
86
+ return neighbors
87
+
88
+
89
+ def get_text_embedding(self, texts, batch=1000):
90
+ embeddings = []
91
+ for i in range(0, len(texts), batch):
92
+ text_batch = texts[i:(i+batch)]
93
+ emb_batch = self.use(text_batch)
94
+ embeddings.append(emb_batch)
95
+ embeddings = np.vstack(embeddings)
96
+ return embeddings
97
+
98
+
99
+
100
+ def load_recommender(path, start_page=1):
101
+ global recommender
102
+ texts = pdf_to_text(path, start_page=start_page)
103
+ chunks = text_to_chunks(texts, start_page=start_page)
104
+ recommender.fit(chunks)
105
+ return 'Corpus Loaded.'
106
+
107
+ def generate_text(openAI_key,prompt, engine="text-davinci-003"):
108
+ openai.api_key = openAI_key
109
+ completions = openai.Completion.create(
110
+ engine=engine,
111
+ prompt=prompt,
112
+ max_tokens=512,
113
+ n=1,
114
+ stop=None,
115
+ temperature=0.7,
116
+ )
117
+ message = completions.choices[0].text
118
+ return message
119
+
120
+ def generate_answer(question,openAI_key):
121
+ topn_chunks = recommender(question)
122
+ prompt = ""
123
+ prompt += 'search results:\n\n'
124
+ for c in topn_chunks:
125
+ prompt += c + '\n\n'
126
+
127
+ prompt += "Instructions: Compose a comprehensive reply to the query using the search results given. "\
128
+ "Cite each reference using [ Page Number] notation (every result has this number at the beginning). "\
129
+ "Citation should be done at the end of each sentence. If the search results mention multiple subjects "\
130
+ "with the same name, create separate answers for each. Only include information found in the results and "\
131
+ "don't add any additional information. Make sure the answer is correct and don't output false content. "\
132
+ "If the text does not relate to the query, simply state 'Text Not Found in PDF'. Ignore outlier "\
133
+ "search results which has nothing to do with the question. Only answer what is asked. The "\
134
+ "answer should be short and concise. Answer step-by-step. \n\nQuery: {question}\nAnswer: "
135
+
136
+ prompt += f"Query: {question}\nAnswer:"
137
+ answer = generate_text(openAI_key, prompt,"text-davinci-003")
138
+ return answer
139
+
140
+
141
+ def question_answer(file,question,openAI_key):
142
+
143
+ if openAI_key.strip()=='':
144
+ return '[ERROR]: Please enter you Open AI Key. Get your key here : https://platform.openai.com/account/api-keys'
145
+
146
+
147
+ file_paths = ["./Smart Protect Goal Brochure.pdf", "./Future Wealth Gain Brochure.pdf"]
148
+ for path in file_paths:
149
+ if file in path:
150
+ load_recommender(path)
151
+ print(path)
152
+
153
+ if question.strip() == '':
154
+ return '[ERROR]: Question field is empty'
155
+
156
+ return generate_answer(question,openAI_key)
157
+
158
+ def allQuestion(file):
159
+
160
+ questionOptions1 = ["What are the various options under Life Cover Variant?","What are the Add-on covers options under Variant description Life Cover?","What is the total claim covered under Minor and Major CI?","What is Waiver of Premium Benefit on CI?","What is Annualized Premium under Life Cover Variant?","Does the ROP include GST?","Under what condition is Add-on Covers applicable?","What is the duration period of premiums for CIB & WOPBI?","What is the maximum maturity age with ROP under the variant?","What is the maximum maturity age with Whole Life under the variant?"]
161
+
162
+ questionOptions2 = ["What is Future Wealth Gain plan?","If the customer has done a partial withdrawls, is he eligible for the Loyalty Additions/Fund Boosters?","What are the steps to select the plan?","What are the maturity benefits available in the wealth plus variant of this plan?","How can one revive a discontinued policy?",'What are the tax benefit options available under this policy?','What are the features under "Wealth Plus" & "Wealth Plus Care" Variant?',"Can I switch between the funds?"]
163
+
164
+ if file == None:
165
+ return '[ERROR]: Provide select atleast one option.'
166
+ if file == "Smart Protect Goal Brochure":
167
+ question = questionOptions1
168
+ if file == "Future Wealth Gain Brochure":
169
+ question = questionOptions2
170
+
171
+ return gr.Dropdown.update(choices=question)
172
+
173
+
174
+ recommender = SemanticSearch()
175
+
176
+ title = 'Madgical Chatbots - PDF GPT'
177
+ description = """ This chantbot will generate answer from PDF file :"""
178
+ options = ['Smart Protect Goal Brochure', 'Future Wealth Gain Brochure']
179
+
180
+ newOption = None
181
+
182
+ with gr.Blocks() as demo:
183
+
184
+ gr.Markdown(f'<center><h1>{title}</h1></center>')
185
+ gr.Markdown(f'<center><h4>{description}</h4></center>')
186
+
187
+
188
+ with gr.Row():
189
+
190
+ with gr.Group():
191
+ openAI_key=gr.Textbox(label='Enter your OpenAI API key here')
192
+ file = gr.Dropdown(options, label="Select PDF file from below options")
193
+ btn2 = gr.Button(value='Submit')
194
+ question = gr.Dropdown(newOption,label="Select questions from below options")
195
+ btn = gr.Button(value='Submit')
196
+ btn.style(full_width=True)
197
+ btn2.style(full_width=True)
198
+
199
+ with gr.Group():
200
+ answer = gr.Textbox(label='Medgical Chatbot Answer :')
201
+ with gr.Group():
202
+ pdfData = gr.Textbox(label='Answer from PDF :')
203
+
204
+ btn.click(question_answer, inputs=[file, question,openAI_key], outputs=[answer])
205
+ btn.click(dataFromPDF, inputs=[question], outputs=[pdfData])
206
+ btn2.click(allQuestion, inputs=[file], outputs=[question])
207
+
208
+ if __name__ == "__main__":
209
+ demo.launch()
pdfQuestions.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def dataFromPDF(question):
2
+ data = [
3
+ {
4
+ 'Smart Protect Goal Brochure':{
5
+ "What are the various options under Life Cover Variant?": 'The following options are\n\
6
+ \u2022 Whole of Life Life Cover up to age 99 years : This option is only available when limited premium payment option is chosen\n\
7
+ \u2022 Return of Premiums (ROP) If no claim has been made for the Variant and/or for each Add-on cover(s), the total premiums paid for the Variant and/or each of the Add-on cover(s) opted for, will be returned on their respective maturity dates. If Whole Life is opted, ROP will not be available.',
8
+
9
+ "What are the Add-on covers options under Variant description Life Cover?":'The following Add-on covers\n\
10
+ \u2022 Accidental Death Benefit (ADB): In case of death due to an accident, Sum Assured chosen as ADB is payable.\n\
11
+ \u2022 Accidental Total Permanent Disability Benefit (ATPDB): In case of occurrence of total permanent disability of the Life Assured due to an accident, Sum Assured chosen as ATPDB is payable.\n\
12
+ \u2022 Critical Illness Benefit (CIB):\n\
13
+ \u2022 In case of diagnosis of any of the listed Critical Illnesses, Sum Assured chosen for Critical Illness benefit is payable.\n\
14
+ \u2022 In case of Minor CI other than Angioplasty, 25% of the CIB will be payable.\n\
15
+ \u2022 For Angioplasty, lower of 5 lakhs or 25% of CIB will be payable.\n\
16
+ \u2022 A maximum of four (4) Minor CI including Angioplasty claims will be payable during the CIB cover period.\n\
17
+ \u2022 For Major CI, 100% of CIB will be payable.',
18
+
19
+ "What is the total claim covered under Minor and Major CI?":"The total claims paid under Minor and Major CI will not be more than 100% of CIB",
20
+
21
+ "What is Waiver of Premium Benefit on CI?":"On occurrence of fourth (4 ) Minor CI or on the date of occurrence of the first (1st) Major CI (incl. ATPD), whichever is earlier, all future premiums due under the policy will be waived and WOPB-I cover will terminate immediately and automatically.",
22
+
23
+ "What is Annualized Premium under Life Cover Variant?":"Annualized Premium is the total premium/s payable in a policy year for a LP, RP and the single premium for an SP. The annualized premium is exclusive of extra premium, add-on covers and loadings for modal premiums, if any, and Total Premiums paid shall be equal to (Annualized Premium * number of years for which premiums have been paid)",
24
+
25
+ "Does the ROP include GST?":"ROP is excluding GST/any other applicable tax levied, subject to changes in tax laws and any extra premium.",
26
+
27
+ "Under what condition is Add-on Covers applicable?":"Add-on Covers will only be applicable, subject to the conditions, exclusions, waiting period, cooling period &survival period as applicable",
28
+ "What is the duration period of premiums for CIB & WOPBI?":"The premiums for CIB &amp WOPB are guaranteed for a period of 5 years and reviewable for subsequent cover periods",
29
+ "What is the maximum maturity age with ROP under the variant?":"The maximum maturity age is 75 years.",
30
+ "What is the maximum maturity age with Whole Life under the variant?":"The maximum maturity age is 99 years."
31
+
32
+ },
33
+ 'Future Wealth Gain Brochure':{
34
+
35
+ "What is Future Wealth Gain plan?": 'Bajaj Allianz Life Future Wealth Gain is a non-participating, individual, life, unit-linked regular/limited premium payment endowment plan with two variants, “Wealth Plus” and “Wealth Plus Care”. Bajaj Allianz Life Future Wealth Gain plan offers the dual benefit of protection and growth to fulfil the dreams of your loved ones.',
36
+
37
+ "If the customer has done a partial withdrawls, is he eligible for the Loyalty Additions/Fund Boosters?": 'Yes. Policyholder will receive Loyalty Addition/ Fund Boosters, provided the policy is in-force and all premiums have been paid.',
38
+
39
+ "What are the steps to select the plan?":'Plan Working\n\
40
+ Step 1: Choose from the two variants Wealth Plus and Wealth Plus Care\n\
41
+ Step 2: Choose the premium you want to pay\n\
42
+ Step 3: Choose the sum assured multiplier to decide your life cover\n\
43
+ Step 4: Choose your policy term and premium payment term\n\
44
+ Step 5: Choose the premium payment frequency\n\
45
+ Step 6: Choose between the two portfolio strategies\n\
46
+ Step 7: Choose the riders (optional and with rider charges applicable)\n\
47
+ Note:\n\
48
+ \u2022 The variant has to be chosen at the inception of the policy and cannot be changed subsequently.\n\
49
+ \u2022 Applicable as per the minimum/maximum sum assured criteria. Please refer to the Eligibility Parameters',
50
+
51
+ "What are the maturity benefits available in the wealth plus variant of this plan?":"On the maturity date, you will receive the Regular Premium Fund Value plus Top up Premium Fund Value",
52
+
53
+ "How can one revive a discontinued policy?":'Revival\n A policy which has been discontinued due to non-payment of premiums can only be revived subject to following conditions:\n\
54
+ 1. The Insurance Company receives the request for revival from you within 3 years from the date of first unpaid premium provided the policy is not terminated already\n\
55
+ 2. You submit such information and documentation as may be requested by the Insurance Company at your own expense.\n\
56
+ 3. The policy may be revived on the original policy terms & conditions, revised terms & conditions or disallowed revival, based on Board approved underwriting guidelines.\n\
57
+ 4. On revival of the discontinued policy,\n \u2022 The policy will be revived restoring the risk cover, Loyalty Addition and Fund Booster.\n \u2022 All the due but unpaid premiums will be collected from you without charging any interest or fee.\n \u2022 The Discontinuance Value of the policy together with the amount of discontinuance/ surrender charge (without any interest) as deducted by the Insurance Company on the date of discontinuance of the policy, shall be restored to the chosen fund split into to the applicable Fund/s available as on the date of discontinuance, at their prevailing unit price.\n \u2022 The Premium Allocation Charge and Policy Administration Charge, as applicable, during the discontinuance period shall be deducted as applicable from regular premiums paid or from the fund at the time of revival.\n \u2022 The Loyalty Additions due-but-not-allotted during the period the Policy was in Discontinuance shall be added to the Regular Premium Fund Value.',
58
+
59
+ 'What are the tax benefit options available under this policy?':'Tax Benefit \n\ Premium paid, maturity benefit, death benefit and surrender benefit are eligible for tax benefits as per extant Income Tax Act, subject to the provision stated therein and as amended from time to time.\n\ You are requested to consult your tax consultant and obtain independent advice for eligibility and before claiming any benefit under the policy.',
60
+
61
+ 'What are the features under "Wealth Plus" & "Wealth Plus Care" Variant?':'Bajaj Allianz Life Future Wealth Gain provides you with two unique portfolio strategies, which can be chosen at the inception of your policy or on any subsequent policy anniversary:\n\
62
+ a) Investor selectable Portfolio Strategy\n\ b) Wheel of Life Portfolio Strategy',
63
+
64
+ "Can I switch between the funds?":'Yes, there is an option to switch between funds - only under the Investor Selectable Portfolio Strategy.\n\
65
+ You have the flexibility to switch units between your investment funds according to your risk appetite and investment decisions, by giving written notice to the Insurance Company.\n\
66
+ \u2022 You can make unlimited free switches.\n\
67
+ \u2022 The minimum switching amount is Rs 5,000 or the value of units in the fund to be switched from, whichever is lower.\n\
68
+ \u2022 The Insurance Company shall effect the switch by redeeming units from the fund to be switched from and allocating new units in the fund being switched to at their respective unit price.'
69
+ }
70
+ }
71
+ ]
72
+
73
+ for brochure in data:
74
+ if question in brochure['Smart Protect Goal Brochure']:
75
+ return brochure['Smart Protect Goal Brochure'][question]
76
+ elif question in brochure['Future Wealth Gain Brochure']:
77
+ return brochure['Future Wealth Gain Brochure'][question]
78
+
79
+ return "Answer not found in available data."
requirements.txt ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.4.0
2
+ aiofiles==23.1.0
3
+ aiohttp==3.8.4
4
+ aiosignal==1.3.1
5
+ altair==4.2.2
6
+ anyio==3.6.2
7
+ astunparse==1.6.3
8
+ async-timeout==4.0.2
9
+ attrs==23.1.0
10
+ cachetools==5.3.0
11
+ certifi==2022.12.7
12
+ charset-normalizer==3.1.0
13
+ click==8.1.3
14
+ colorama==0.4.6
15
+ contourpy==1.0.7
16
+ cycler==0.11.0
17
+ entrypoints==0.4
18
+ et-xmlfile==1.1.0
19
+ fastapi==0.95.1
20
+ ffmpy==0.3.0
21
+ filelock==3.12.0
22
+ flatbuffers==23.3.3
23
+ fonttools==4.39.3
24
+ frozenlist==1.3.3
25
+ fsspec==2023.4.0
26
+ gast==0.4.0
27
+ google-auth==2.17.3
28
+ google-auth-oauthlib==1.0.0
29
+ google-pasta==0.2.0
30
+ gradio==3.27.0
31
+ gradio_client==0.1.3
32
+ grpcio==1.54.0
33
+ h11==0.14.0
34
+ h5py==3.8.0
35
+ httpcore==0.17.0
36
+ httpx==0.24.0
37
+ huggingface-hub==0.14.1
38
+ idna==3.4
39
+ jax==0.4.8
40
+ Jinja2==3.1.2
41
+ joblib==1.2.0
42
+ jsonschema==4.17.3
43
+ keras==2.12.0
44
+ kiwisolver==1.4.4
45
+ libclang==16.0.0
46
+ linkify-it-py==2.0.0
47
+ Markdown==3.4.3
48
+ markdown-it-py==2.2.0
49
+ MarkupSafe==2.1.2
50
+ matplotlib==3.7.1
51
+ mdit-py-plugins==0.3.3
52
+ mdurl==0.1.2
53
+ ml-dtypes==0.1.0
54
+ multidict==6.0.4
55
+ numpy==1.23.5
56
+ oauthlib==3.2.2
57
+ openai==0.10.2
58
+ openpyxl==3.1.2
59
+ opt-einsum==3.3.0
60
+ orjson==3.8.10
61
+ packaging==23.1
62
+ pandas==2.0.1
63
+ pandas-stubs==2.0.0.230412
64
+ Pillow==9.5.0
65
+ protobuf==4.22.3
66
+ pyasn1==0.5.0
67
+ pyasn1-modules==0.3.0
68
+ pydantic==1.10.7
69
+ pydub==0.25.1
70
+ PyMuPDF==1.22.2
71
+ pyparsing==3.0.9
72
+ pyrsistent==0.19.3
73
+ python-dateutil==2.8.2
74
+ python-multipart==0.0.6
75
+ pytz==2023.3
76
+ PyYAML==6.0
77
+ requests==2.29.0
78
+ requests-oauthlib==1.3.1
79
+ rsa==4.9
80
+ scikit-learn==1.2.2
81
+ scipy==1.10.1
82
+ semantic-version==2.10.0
83
+ six==1.16.0
84
+ sniffio==1.3.0
85
+ starlette==0.26.1
86
+ tensorboard==2.12.2
87
+ tensorboard-data-server==0.7.0
88
+ tensorboard-plugin-wit==1.8.1
89
+ tensorflow==2.12.0
90
+ tensorflow-estimator==2.12.0
91
+ tensorflow-hub==0.13.0
92
+ tensorflow-intel==2.12.0
93
+ tensorflow-io-gcs-filesystem==0.31.0
94
+ termcolor==2.3.0
95
+ threadpoolctl==3.1.0
96
+ toolz==0.12.0
97
+ tqdm==4.65.0
98
+ types-pytz==2023.3.0.0
99
+ typing_extensions==4.5.0
100
+ tzdata==2023.3
101
+ uc-micro-py==1.0.1
102
+ urllib3==1.26.15
103
+ uvicorn==0.21.1
104
+ websockets==11.0.2
105
+ Werkzeug==2.3.0
106
+ wrapt==1.14.1
107
+ yarl==1.9.2