Rauhan commited on
Commit
6a1e988
1 Parent(s): d07d38d

UPDATE: base64 encodings

Browse files
Files changed (2) hide show
  1. app.py +10 -1
  2. functions.py +6 -3
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import io
2
  import tempfile
3
  import jwt
 
4
  from click import option
5
  from jwt import ExpiredSignatureError, InvalidTokenError
6
  from starlette import status
@@ -273,8 +274,16 @@ async def returnText(pdf: UploadFile = File(...)):
273
  }
274
 
275
 
 
 
 
 
 
 
276
  @app.post("/addText")
277
- async def addText(vectorstore: str, text: str, source: str = "Text"):
 
 
278
  username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
279
  df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data)
280
  currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
 
1
  import io
2
  import tempfile
3
  import jwt
4
+ import base64
5
  from click import option
6
  from jwt import ExpiredSignatureError, InvalidTokenError
7
  from starlette import status
 
274
  }
275
 
276
 
277
+ class AddText(BaseModel):
278
+ vectorstore: str
279
+ text: str
280
+ source: str = "Text"
281
+
282
+
283
  @app.post("/addText")
284
+ async def addText(addTextConfig: AddText):
285
+ vectorstore, text, source = addTextConfig.vectorstore, addTextConfig.text, addTextConfig.source
286
+ text = base64.b64decode(text.encode("utf-8")).decode("utf-8")
287
  username, chatbotname = vectorstore.split("$")[1], vectorstore.split("$")[2]
288
  df = pd.DataFrame(supabase.table("ConversAI_ChatbotInfo").select("*").execute().data)
289
  currentCount = df[(df["user_id"] == username) & (df["chatbotname"] == chatbotname)]["charactercount"].iloc[0]
functions.py CHANGED
@@ -288,7 +288,7 @@ def getTextFromImagePDF(pdfBytes):
288
  return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
289
 
290
  allImages = convert_from_bytes(pdfBytes)
291
- texts = [getText(image) for image in allImages]
292
  return {x + 1: y for x, y in enumerate(texts)}
293
 
294
 
@@ -304,6 +304,7 @@ def getTranscript(urls: str):
304
  except:
305
  doc = ""
306
  texts.append(doc)
 
307
  return {x: y for x, y in zip(urls, texts)}
308
 
309
 
@@ -321,7 +322,8 @@ def analyzeData(query, dataframe):
321
 
322
 
323
  def extractTextFromPage(page):
324
- return page.get_text()
 
325
 
326
 
327
  def extractTextFromPdf(pdf_path):
@@ -338,7 +340,8 @@ def extractTextFromUrl(url):
338
  response.raise_for_status()
339
  html = response.text
340
  soup = BeautifulSoup(html, 'lxml')
341
- return soup.get_text(separator=' ', strip=True)
 
342
 
343
 
344
  def extractTextFromUrlList(urls):
 
288
  return "\n".join([text[1] for text in reader.readtext(np.array(image), paragraph=True)])
289
 
290
  allImages = convert_from_bytes(pdfBytes)
291
+ texts = [base64.b64encode(getText(image).encode("utf-8")).decode("utf-8") for image in allImages]
292
  return {x + 1: y for x, y in enumerate(texts)}
293
 
294
 
 
304
  except:
305
  doc = ""
306
  texts.append(doc)
307
+ texts = [base64.b64encode(text.encode("utf-8")).decode("utf-8") for text in texts]
308
  return {x: y for x, y in zip(urls, texts)}
309
 
310
 
 
322
 
323
 
324
  def extractTextFromPage(page):
325
+ text = page.get_text()
326
+ return base64.b64encode(text.encode("utf-8")).decode("utf-8")
327
 
328
 
329
  def extractTextFromPdf(pdf_path):
 
340
  response.raise_for_status()
341
  html = response.text
342
  soup = BeautifulSoup(html, 'lxml')
343
+ text = soup.get_text(separator=' ', strip=True)
344
+ return base64.b64encode(text.encode("utf-8")).decode("utf-8")
345
 
346
 
347
  def extractTextFromUrlList(urls):