Spaces:

ka1kuk
/

LLM-api

Running

App Files Files Community

ka1kuk commited on Mar 16

Commit

7c2f128

•

1 Parent(s): 62f3d3a

Update apis/chat_api.py

Browse files

Files changed (1) hide show

apis/chat_api.py +14 -26

apis/chat_api.py CHANGED Viewed

@@ -187,42 +187,30 @@ class ChatAPIApp:
             data_response = streamer.chat_return_dict(stream_response)
             return data_response
-    async def chat_embedding(self, input, model_name, api_key: str = Depends(extract_api_key)):
         api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_name}"
         headers = {"Authorization": f"Bearer {api_key}"}
-        response = requests.post(api_url, headers=headers, json={"inputs": input})
         result = response.json()
         if isinstance(result, list) and len(result) > 0 and isinstance(result[0], list):
-            # Assuming each embedding is a list of lists of floats, flatten it
-            flattened_embeddings = [sum(embedding, []) for embedding in result]
-            return flattened_embeddings
         elif "error" in result:
             raise RuntimeError("The model is currently loading, please re-run the query.")
         else:
             raise RuntimeError("Unexpected response format.")
     async def embedding(self, request: QueryRequest, api_key: str = Depends(extract_api_key)):
-            try:
-                for attempt in range(3):  # Retry logic
-                    try:
-                        embeddings = await self.chat_embedding(request.input, request.model, api_key)
-                        data = [
-                            {"object": "embedding", "index": i, "embedding": embedding}
-                            for i, embedding in enumerate(embeddings)
-                        ]
-                        return {
-                            "object": "list",
-                            "data": data,
-                            "model": request.model,
-                            "usage": {"prompt_tokens": len(request.input), "total_tokens": len(request.input)}
-                        }
-                    except RuntimeError as e:
-                        if attempt < 2:  # Don't sleep on the last attempt
-                            await asyncio.sleep(10)  # Delay for the retry
-                raise HTTPException(status_code=503, detail="The model is currently loading, please try again later.")
-            except Exception as e:
-                raise HTTPException(status_code=500, detail=str(e))
     def setup_routes(self):
         for prefix in ["", "/v1", "/api", "/api/v1"]:

             data_response = streamer.chat_return_dict(stream_response)
             return data_response
+    async def chat_embedding(self, input_text: str, model_name: str, api_key: str):
         api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_name}"
         headers = {"Authorization": f"Bearer {api_key}"}
+        response = requests.post(api_url, headers=headers, json={"inputs": input_text})
         result = response.json()
         if isinstance(result, list) and len(result) > 0 and isinstance(result[0], list):
+            return [item for sublist in result for item in sublist]  # Flatten the list of lists
         elif "error" in result:
             raise RuntimeError("The model is currently loading, please re-run the query.")
         else:
             raise RuntimeError("Unexpected response format.")
     async def embedding(self, request: QueryRequest, api_key: str = Depends(extract_api_key)):
+        try:
+            embeddings = await self.chat_embedding(request.input, request.model, api_key)
+            data = [{"object": "embedding", "index": i, "embedding": embedding} for i, embedding in enumerate(embeddings)]
+            return EmbeddingResponse(
+                object="list",
+                data=data,
+                model=request.model,
+                usage={"prompt_tokens": len(request.input), "total_tokens": len(request.input)}
+            )
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
     def setup_routes(self):
         for prefix in ["", "/v1", "/api", "/api/v1"]: