Spaces:
Sleeping
Sleeping
UPDATE: YT Transcripts
Browse files- app.py +17 -1
- functions.py +9 -1
- requirements.txt +1 -0
app.py
CHANGED
@@ -8,6 +8,7 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
8 |
from langchain_community.document_loaders import UnstructuredURLLoader
|
9 |
|
10 |
|
|
|
11 |
app = FastAPI(title = "ConversAI", root_path = "/api/v1")
|
12 |
app.add_middleware(
|
13 |
CORSMiddleware,
|
@@ -159,4 +160,19 @@ async def getCount(vectorstore: str):
|
|
159 |
|
160 |
@app.post("/getYoutubeTranscript")
|
161 |
async def getYTTranscript(url: str):
|
162 |
-
return getTranscript(url = url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
from langchain_community.document_loaders import UnstructuredURLLoader
|
9 |
|
10 |
|
11 |
+
|
12 |
app = FastAPI(title = "ConversAI", root_path = "/api/v1")
|
13 |
app.add_middleware(
|
14 |
CORSMiddleware,
|
|
|
160 |
|
161 |
@app.post("/getYoutubeTranscript")
|
162 |
async def getYTTranscript(url: str):
|
163 |
+
return getTranscript(url = url)
|
164 |
+
|
165 |
+
|
166 |
+
@app.post("/analyzeData")
|
167 |
+
async def analyzeAndAnswer(query: str, file: UploadFile = File(...)):
|
168 |
+
extension = file.name.split(".")[-1]
|
169 |
+
if extension in ["xls", "xlsx", "xlsm", "xlsb"]:
|
170 |
+
df = pd.read_excel(io.BytesIO(file.read()))
|
171 |
+
return analyzeData(query = query, dataframe = df)
|
172 |
+
elif extension == "csv":
|
173 |
+
df = pd.read_csv(io.BytesIO(file.read()))
|
174 |
+
return analyzeData(query = query, dataframe = df)
|
175 |
+
else:
|
176 |
+
return {
|
177 |
+
"output": "INVALID FILE TYPE"
|
178 |
+
}
|
functions.py
CHANGED
@@ -8,6 +8,7 @@ from langchain_core.output_parsers import StrOutputParser
|
|
8 |
from langchain.retrievers import ParentDocumentRetriever
|
9 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
10 |
from langchain.memory import ChatMessageHistory
|
|
|
11 |
from langchain_core.chat_history import BaseChatMessageHistory
|
12 |
from langchain.storage import InMemoryStore
|
13 |
from langchain_community.document_loaders import YoutubeLoader
|
@@ -298,7 +299,7 @@ def getTextFromImagePDF(pdfBytes):
|
|
298 |
return text
|
299 |
|
300 |
|
301 |
-
def getTranscript(
|
302 |
loader = YoutubeLoader.from_youtube_url(
|
303 |
url, add_video_info=False
|
304 |
)
|
@@ -307,3 +308,10 @@ def getTranscript(url: str):
|
|
307 |
except:
|
308 |
doc = "ENGLISH TRANSCRIPT UNAVAILABLE"
|
309 |
return doc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
from langchain.retrievers import ParentDocumentRetriever
|
9 |
from langchain_core.runnables.history import RunnableWithMessageHistory
|
10 |
from langchain.memory import ChatMessageHistory
|
11 |
+
from pandasai import SmartDataframe
|
12 |
from langchain_core.chat_history import BaseChatMessageHistory
|
13 |
from langchain.storage import InMemoryStore
|
14 |
from langchain_community.document_loaders import YoutubeLoader
|
|
|
299 |
return text
|
300 |
|
301 |
|
302 |
+
def getTranscript(urls: dict[str, str]):
|
303 |
loader = YoutubeLoader.from_youtube_url(
|
304 |
url, add_video_info=False
|
305 |
)
|
|
|
308 |
except:
|
309 |
doc = "ENGLISH TRANSCRIPT UNAVAILABLE"
|
310 |
return doc
|
311 |
+
|
312 |
+
|
313 |
+
def analyzeData(query, dataframe):
|
314 |
+
llm = ChatGroq("gemma2-9b-it")
|
315 |
+
df = SmartDataframe(dataframe, config = {"llm": llm, "verbose": False})
|
316 |
+
response = df.chat(query)
|
317 |
+
return response
|
requirements.txt
CHANGED
@@ -16,6 +16,7 @@ numpy
|
|
16 |
PyPDF2
|
17 |
python-dotenv
|
18 |
pydantic
|
|
|
19 |
pandas
|
20 |
easyocr
|
21 |
youtube-transcript-api
|
|
|
16 |
PyPDF2
|
17 |
python-dotenv
|
18 |
pydantic
|
19 |
+
pandasas
|
20 |
pandas
|
21 |
easyocr
|
22 |
youtube-transcript-api
|