Rauhan commited on
Commit
706993d
1 Parent(s): c4a2d1f

UPDATE: web crawler

Browse files
Files changed (3) hide show
  1. Dockerfile +1 -1
  2. app.py +1 -1
  3. secrets.env +2 -1
Dockerfile CHANGED
@@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y \
12
  && apt-get clean \
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
- RUN pip install -r requirements.txt
16
 
17
  EXPOSE 7860
18
 
 
12
  && apt-get clean \
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
+ RUN pip install --no-cache-dir -r requirements.txt
16
 
17
  EXPOSE 7860
18
 
app.py CHANGED
@@ -50,7 +50,7 @@ async def addText(vectorstore: str, text: str):
50
 
51
  @app.post("/addWebsite")
52
  async def addWebsite(vectorstore: str, websiteUrl: str):
53
- urls = getLinks("https://www.youtube.com/watch?v=dQw4w9WgXcQ")
54
  loader = UnstructuredURLLoader(urls=urls)
55
  docs = loader.load()
56
  text = "\n\n\n\n".join([f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
 
50
 
51
  @app.post("/addWebsite")
52
  async def addWebsite(vectorstore: str, websiteUrl: str):
53
+ urls = getLinks(websiteUrl)
54
  loader = UnstructuredURLLoader(urls=urls)
55
  docs = loader.load()
56
  text = "\n\n\n\n".join([f"Metadata:\n{docs[doc].metadata} \nPage Content:\n {docs[doc].page_content}" for doc in range(len(docs))])
secrets.env CHANGED
@@ -3,4 +3,5 @@ SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZi
3
  GROQ_API_KEY=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
4
  QDRANT_URL=https://baeef19e-8f9f-4b14-b95f-45946d6fe1e6.us-east4-0.gcp.cloud.qdrant.io:6333
5
  QDRANT_API_KEY=k0V8kKNulQdRLukhYy03kJcncctoDImbiPHgmvaEEsup8MwTjqgT0w
6
- COHERE_API_KEY=lCu3rZEjcUPAt0RsdQpQlGtgYp1uKAmuNIBdjFKq
 
 
3
  GROQ_API_KEY=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
4
  QDRANT_URL=https://baeef19e-8f9f-4b14-b95f-45946d6fe1e6.us-east4-0.gcp.cloud.qdrant.io:6333
5
  QDRANT_API_KEY=k0V8kKNulQdRLukhYy03kJcncctoDImbiPHgmvaEEsup8MwTjqgT0w
6
+ COHERE_API_KEY=lCu3rZEjcUPAt0RsdQpQlGtgYp1uKAmuNIBdjFKq
7
+ NLTK_DATA=/app/nltk_data