Rauhan commited on
Commit
937bcc4
1 Parent(s): ac9adab

UPDATE: YT Transcripts

Browse files
Files changed (3) hide show
  1. app.py +17 -1
  2. functions.py +9 -1
  3. requirements.txt +1 -0
app.py CHANGED
@@ -8,6 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware
8
  from langchain_community.document_loaders import UnstructuredURLLoader
9
 
10
 
 
11
  app = FastAPI(title = "ConversAI", root_path = "/api/v1")
12
  app.add_middleware(
13
  CORSMiddleware,
@@ -159,4 +160,19 @@ async def getCount(vectorstore: str):
159
 
160
  @app.post("/getYoutubeTranscript")
161
  async def getYTTranscript(url: str):
162
- return getTranscript(url = url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from langchain_community.document_loaders import UnstructuredURLLoader
9
 
10
 
11
+
12
  app = FastAPI(title = "ConversAI", root_path = "/api/v1")
13
  app.add_middleware(
14
  CORSMiddleware,
 
160
 
161
  @app.post("/getYoutubeTranscript")
162
  async def getYTTranscript(url: str):
163
+ return getTranscript(url = url)
164
+
165
+
166
+ @app.post("/analyzeData")
167
+ async def analyzeAndAnswer(query: str, file: UploadFile = File(...)):
168
+ extension = file.name.split(".")[-1]
169
+ if extension in ["xls", "xlsx", "xlsm", "xlsb"]:
170
+ df = pd.read_excel(io.BytesIO(file.read()))
171
+ return analyzeData(query = query, dataframe = df)
172
+ elif extension == "csv":
173
+ df = pd.read_csv(io.BytesIO(file.read()))
174
+ return analyzeData(query = query, dataframe = df)
175
+ else:
176
+ return {
177
+ "output": "INVALID FILE TYPE"
178
+ }
functions.py CHANGED
@@ -8,6 +8,7 @@ from langchain_core.output_parsers import StrOutputParser
8
  from langchain.retrievers import ParentDocumentRetriever
9
  from langchain_core.runnables.history import RunnableWithMessageHistory
10
  from langchain.memory import ChatMessageHistory
 
11
  from langchain_core.chat_history import BaseChatMessageHistory
12
  from langchain.storage import InMemoryStore
13
  from langchain_community.document_loaders import YoutubeLoader
@@ -298,7 +299,7 @@ def getTextFromImagePDF(pdfBytes):
298
  return text
299
 
300
 
301
- def getTranscript(url: str):
302
  loader = YoutubeLoader.from_youtube_url(
303
  url, add_video_info=False
304
  )
@@ -307,3 +308,10 @@ def getTranscript(url: str):
307
  except:
308
  doc = "ENGLISH TRANSCRIPT UNAVAILABLE"
309
  return doc
 
 
 
 
 
 
 
 
8
  from langchain.retrievers import ParentDocumentRetriever
9
  from langchain_core.runnables.history import RunnableWithMessageHistory
10
  from langchain.memory import ChatMessageHistory
11
+ from pandasai import SmartDataframe
12
  from langchain_core.chat_history import BaseChatMessageHistory
13
  from langchain.storage import InMemoryStore
14
  from langchain_community.document_loaders import YoutubeLoader
 
299
  return text
300
 
301
 
302
+ def getTranscript(urls: dict[str, str]):
303
  loader = YoutubeLoader.from_youtube_url(
304
  url, add_video_info=False
305
  )
 
308
  except:
309
  doc = "ENGLISH TRANSCRIPT UNAVAILABLE"
310
  return doc
311
+
312
+
313
+ def analyzeData(query, dataframe):
314
+ llm = ChatGroq("gemma2-9b-it")
315
+ df = SmartDataframe(dataframe, config = {"llm": llm, "verbose": False})
316
+ response = df.chat(query)
317
+ return response
requirements.txt CHANGED
@@ -16,6 +16,7 @@ numpy
16
  PyPDF2
17
  python-dotenv
18
  pydantic
 
19
  pandas
20
  easyocr
21
  youtube-transcript-api
 
16
  PyPDF2
17
  python-dotenv
18
  pydantic
19
+ pandasas
20
  pandas
21
  easyocr
22
  youtube-transcript-api