Rauhan commited on
Commit
864f700
1 Parent(s): e52ad04

DEBUG: nltk

Browse files
Files changed (2) hide show
  1. app.py +6 -1
  2. functions.py +0 -3
app.py CHANGED
@@ -2,7 +2,7 @@ import io
2
  import tempfile
3
  from ipaddress import ip_address
4
  from typing import Optional
5
-
6
  import jwt
7
  import base64
8
  import json
@@ -21,6 +21,9 @@ from collections import Counter, defaultdict
21
  from datetime import datetime, timedelta
22
  from dateutil.parser import isoparse
23
 
 
 
 
24
  app = FastAPI(title="ConversAI", root_path="/api/v1")
25
 
26
  app.add_middleware(
@@ -263,6 +266,7 @@ async def loadPDF(vectorstore: str, pdf: UploadFile = File(...)):
263
  "output": text,
264
  "source": source
265
  }
 
266
  dct = json.dumps(dct, indent=1).encode("utf-8")
267
  fileName = createDataSourceName(sourceName=source)
268
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
@@ -271,6 +275,7 @@ async def loadPDF(vectorstore: str, pdf: UploadFile = File(...)):
271
  .insert({"username": username,
272
  "chatbotName": chatbotName,
273
  "dataSourceName": fileName,
 
274
  "sourceEndpoint": "/loadPDF",
275
  "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"], f"{fileName}_data.json")})
276
  .execute()
 
2
  import tempfile
3
  from ipaddress import ip_address
4
  from typing import Optional
5
+ import nltk
6
  import jwt
7
  import base64
8
  import json
 
21
  from datetime import datetime, timedelta
22
  from dateutil.parser import isoparse
23
 
24
+
25
+ nltk.download("punkt_tab")
26
+
27
  app = FastAPI(title="ConversAI", root_path="/api/v1")
28
 
29
  app.add_middleware(
 
266
  "output": text,
267
  "source": source
268
  }
269
+ numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
270
  dct = json.dumps(dct, indent=1).encode("utf-8")
271
  fileName = createDataSourceName(sourceName=source)
272
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
 
275
  .insert({"username": username,
276
  "chatbotName": chatbotName,
277
  "dataSourceName": fileName,
278
+ "numTokens": numTokens,
279
  "sourceEndpoint": "/loadPDF",
280
  "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"], f"{fileName}_data.json")})
281
  .execute()
functions.py CHANGED
@@ -7,7 +7,6 @@ from langchain_qdrant import QdrantVectorStore
7
  from langchain_qdrant import RetrievalMode
8
  from langchain_core.prompts.chat import ChatPromptTemplate
9
  from uuid import uuid4
10
- import nltk
11
  from langchain_core.output_parsers import StrOutputParser
12
  from langchain.retrievers import ParentDocumentRetriever
13
  from langchain_core.runnables.history import RunnableWithMessageHistory
@@ -35,8 +34,6 @@ import base64
35
  import time
36
  import requests
37
 
38
- nltk.download('punkt_tab')
39
-
40
 
41
  load_dotenv("secrets.env")
42
  client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])
 
7
  from langchain_qdrant import RetrievalMode
8
  from langchain_core.prompts.chat import ChatPromptTemplate
9
  from uuid import uuid4
 
10
  from langchain_core.output_parsers import StrOutputParser
11
  from langchain.retrievers import ParentDocumentRetriever
12
  from langchain_core.runnables.history import RunnableWithMessageHistory
 
34
  import time
35
  import requests
36
 
 
 
37
 
38
  load_dotenv("secrets.env")
39
  client = create_client(os.environ["SUPABASE_URL"], os.environ["SUPABASE_KEY"])