Rauhan commited on
Commit
6c7d766
1 Parent(s): 736af94

UPDATE: New Endpoints

Browse files
Files changed (3) hide show
  1. app.py +62 -8
  2. functions.py +2 -6
  3. requirements.txt +1 -0
app.py CHANGED
@@ -1,11 +1,13 @@
1
  import io
2
  from functions import *
3
  from PyPDF2 import PdfReader
 
4
  from fastapi import FastAPI, File, UploadFile
5
  from fastapi.middleware.cors import CORSMiddleware
6
  from langchain_community.document_loaders import UnstructuredURLLoader
7
 
8
 
 
9
  app = FastAPI(title = "ConversAI", root_path = "/api/v1")
10
  app.add_middleware(
11
  CORSMiddleware,
@@ -29,23 +31,63 @@ async def login(username: str, password: str):
29
 
30
  @app.post("/newChatbot")
31
  async def newChatbot(chatbotName: str, username: str):
 
32
  chatbotName = f"convai-{username}-{chatbotName}"
33
  return createTable(tablename = chatbotName)
34
 
35
 
36
- @app.post("/getRawPDFText")
37
  async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
38
  pdf = await pdf.read()
39
  reader = PdfReader(io.BytesIO(pdf))
40
  text = ""
41
  for page in reader.pages:
42
  text += page.extract_text()
43
- return text
44
-
45
-
46
- @app.post("/addData")
 
 
 
 
 
 
 
 
 
 
47
  async def addText(vectorstore: str, text: str):
48
- return addDocuments(text = text, vectorstore = vectorstore)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  @app.post("/answerQuery")
51
  async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192"):
@@ -54,12 +96,24 @@ async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-7
54
 
55
  @app.post("/deleteChatbot")
56
  async def delete(chatbotName: str):
 
 
57
  return deleteTable(tableName=chatbotName)
58
 
59
  @app.post("/listChatbots")
60
  async def delete(username: str):
61
  return listTables(username=username)
62
 
63
- @app.post("/getWebsiteData")
64
  async def crawlUrl(baseUrl: str):
65
- return getRawWebText(url=baseUrl, timeout=30)
 
 
 
 
 
 
 
 
 
 
 
1
  import io
2
  from functions import *
3
  from PyPDF2 import PdfReader
4
+ import pandas as pd
5
  from fastapi import FastAPI, File, UploadFile
6
  from fastapi.middleware.cors import CORSMiddleware
7
  from langchain_community.document_loaders import UnstructuredURLLoader
8
 
9
 
10
+
11
  app = FastAPI(title = "ConversAI", root_path = "/api/v1")
12
  app.add_middleware(
13
  CORSMiddleware,
 
31
 
32
  @app.post("/newChatbot")
33
  async def newChatbot(chatbotName: str, username: str):
34
+ client.table("ConversAI_ChatbotInfo").insert({"username": username, "chatbotname": chatbotName}).execute()
35
  chatbotName = f"convai-{username}-{chatbotName}"
36
  return createTable(tablename = chatbotName)
37
 
38
 
39
+ @app.post("/addPDF")
40
  async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
41
  pdf = await pdf.read()
42
  reader = PdfReader(io.BytesIO(pdf))
43
  text = ""
44
  for page in reader.pages:
45
  text += page.extract_text()
46
+ username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
47
+ df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
48
+ currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"][0]
49
+ newCount = currentCount + len(text)
50
+ if newCount < 400000:
51
+ client.table("ConversAI_ChatbotInfo").update({"charactercount", newCount}).eq("username", username).eq("chatbotname", chatbotname).execute()
52
+ return addDocuments(text = text, vectorstore = vectorstore)
53
+ else:
54
+ return {
55
+ "output": "DOCUMENT EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
56
+ }
57
+
58
+
59
+ @app.post("/addText")
60
  async def addText(vectorstore: str, text: str):
61
+ username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
62
+ df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
63
+ currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"][0]
64
+ newCount = currentCount + len(text)
65
+ if newCount < 400000:
66
+ client.table("ConversAI_ChatbotInfo").update({"charactercount", newCount}).eq("username", username).eq("chatbotname", chatbotname).execute()
67
+ return addDocuments(text = text, vectorstore = vectorstore)
68
+ else:
69
+ return {
70
+ "output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
71
+ }
72
+
73
+
74
+ @app.post("/addWebsite")
75
+ async def addWebsite(vectorstore: str, websiteUrl: str):
76
+ urls = getLinks(websiteUrl)
77
+ loader = UnstructuredURLLoader(urls=urls)
78
+ docs = loader.load()
79
+ text = "\n\n\n\n".join([f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
80
+ username, chatbotname = vectorstore.split("-")[1], vectorstore.split("-")[2]
81
+ df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
82
+ currentCount = df[(df["username"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"][0]
83
+ newCount = currentCount + len(text)
84
+ if newCount < 400000:
85
+ client.table("ConversAI_ChatbotInfo").update({"charactercount", newCount}).eq("username", username).eq("chatbotname", chatbotname).execute()
86
+ return addDocuments(text = text, vectorstore = vectorstore)
87
+ else:
88
+ return {
89
+ "output": "WEBSITE EXCEEDING LIMITS, PLEASE TRY WITH A SMALLER DOCUMENT."
90
+ }
91
 
92
  @app.post("/answerQuery")
93
  async def answerQuestion(query: str, vectorstore: str, llmModel: str = "llama3-70b-8192"):
 
96
 
97
  @app.post("/deleteChatbot")
98
  async def delete(chatbotName: str):
99
+ username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
100
+ client.table('ConversAI_ChatbotInfo').delete().eq('username', username).eq('chatbotname', chatbotName).execute()
101
  return deleteTable(tableName=chatbotName)
102
 
103
  @app.post("/listChatbots")
104
  async def delete(username: str):
105
  return listTables(username=username)
106
 
107
+ @app.post("/getLinks")
108
  async def crawlUrl(baseUrl: str):
109
+ return {
110
+ "urls": getLinks(url=baseUrl, timeout=30)
111
+ }
112
+
113
+ @app.post("/getCurrentCount")
114
+ async def getCount(vectorstore: str):
115
+ username, chatbotName = chatbotName.split("-")[1], chatbotName.split("-")[2]
116
+ df = pd.DataFrame(client.table("ConversAI_ChatbotInfo").select("*").execute().data)
117
+ return {
118
+ "currentCount": df[(df['username'] == username) & (df['chatbotname'] == chatbotName)]['charactercount'][0]
119
+ }
functions.py CHANGED
@@ -258,7 +258,7 @@ def listTables(username: str):
258
  }
259
 
260
 
261
- def getRawWebText(url: str, timeout = 30):
262
  start = time.time()
263
  def getLinksFromPage(url: str) -> list:
264
  response = requests.get(url)
@@ -286,8 +286,4 @@ def getRawWebText(url: str, timeout = 30):
286
  break
287
  else:
288
  uniqueLinks = uniqueLinks.union(set(getLinksFromPage(link)))
289
- allLinks = {}
290
- foundLinks = list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
291
- for link in foundLinks:
292
- allLinks[link] = BeautifulSoup(requests.get(link).text, "lxml").body.get_text(" ", strip = True)
293
- return allLinks
 
258
  }
259
 
260
 
261
+ def getLinks(url: str, timeout = 30):
262
  start = time.time()
263
  def getLinksFromPage(url: str) -> list:
264
  response = requests.get(url)
 
286
  break
287
  else:
288
  uniqueLinks = uniqueLinks.union(set(getLinksFromPage(link)))
289
+ return list(set([x[:len(x) - 1] if x[-1] == "/" else x for x in uniqueLinks]))
 
 
 
 
requirements.txt CHANGED
@@ -13,6 +13,7 @@ langchain-groq
13
  lxml
14
  PyPDF2
15
  python-dotenv
 
16
  sentence-transformers
17
  supabase
18
  unstructured
 
13
  lxml
14
  PyPDF2
15
  python-dotenv
16
+ pandas
17
  sentence-transformers
18
  supabase
19
  unstructured