Spaces:

oflakne26
/

New-Place

Sleeping

App Files Files Community

oflakne26 commited on May 25

Commit

e53fb7b

•

1 Parent(s): afb8fad

Update main.py

Browse files

Files changed (1) hide show

main.py +84 -51

main.py CHANGED Viewed

@@ -1,61 +1,94 @@
-import os
-from fastapi import FastAPI, HTTPException, Depends
 from pydantic import BaseModel
-from ctransformers import AutoModelForCausalLM
-# Pydantic object for request validation
-class Validation(BaseModel):
-    inputs: str
-    temperature: float = 0.0
-    max_new_tokens: int = 1048
-    top_p: float = 0.15
-    repetition_penalty: float = 1.0
-# Initialize FastAPI app
 app = FastAPI()
-# Function to load models and create endpoints
-def setup_endpoints(app):
-    model_base_path = './models'
-    if not os.path.exists(model_base_path) or not os.path.isdir(model_base_path):
-        raise RuntimeError("Models directory does not exist or is not a directory")
-    model_dirs = [d for d in os.listdir(model_base_path) if os.path.isdir(os.path.join(model_base_path, d))]
-    if not model_dirs:
-        raise RuntimeError("No models found in the models directory")
-    models = {}
-    # Load each model
-    for model_name in model_dirs:
-        model_path = os.path.join(model_base_path, model_name)
         try:
-            model = AutoModelForCausalLM.from_pretrained(model_path, threads=2)
-            models[model_name] = model
         except Exception as e:
-            print(f"Failed to load model {model_name}: {e}")
-            continue
-    # Function to get model dependency
-    def get_model(model_name: str):
-        if model_name not in models:
-            raise HTTPException(status_code=404, detail="Model not found")
-        return models[model_name]
-    # Create an endpoint for each model
-    for model_name in model_dirs:
-        @app.post(f"/{model_name}")
-        async def generate_response(item: Validation, model=Depends(lambda: get_model(model_name))):
-            try:
-                response = model(item.inputs,
-                                 temperature=item.temperature,
-                                 max_new_tokens=item.max_new_tokens,
-                                 top_p=item.top_p,
-                                 repetition_penalty=item.repetition_penalty)
-                return response
-            except Exception as e:
-                raise HTTPException(status_code=500, detail=str(e))
-# Setup endpoints
-setup_endpoints(app)

+from fastapi import FastAPI, HTTPException
+from typing import Any
 from pydantic import BaseModel
+from os import getenv
+from huggingface_hub import InferenceClient
+import random
+from json_repair import repair_json
+import nltk
 app = FastAPI()
+nltk.download('punkt')
+tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
+HF_TOKEN = getenv("HF_TOKEN")
+MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
+FALLBACK_MODELS = [
+    "mistralai/Mixtral-8x7B-Instruct-v0.1",
+    "mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mistral-7B-Instruct-v0.1"
+]
+class InputData(BaseModel):
+    model: str
+    system_prompt_template: str
+    prompt_template: str
+    system_prompt: str
+    user_input: str
+    json_prompt: str
+    history: str = ""
+@app.post("/generate-response/")
+async def generate_response(data: InputData) -> Any:
+    client = InferenceClient(model=data.model, token=HF_TOKEN)
+    sentences = tokenizer.tokenize(data.user_input)
+    data_dict = {'###New response###': [], '###Sentence count###': 0}
+    for i, sentence in enumerate(sentences):
+        data_dict["###New response###"].append(sentence)
+        data_dict["###Sentence count###"] = i + 1
+    data.history += data.prompt_template.replace("{Prompt}", str(data_dict))
+    inputs = (
+        data.system_prompt_template.replace("{SystemPrompt}",
+                                       data.system_prompt) +
+        data.system_prompt_template.replace("{SystemPrompt}", data.json_prompt) +
+        data.history)
+    seed = random.randint(0, 2**32 - 1)
+    models_to_try = [data.model] + FALLBACK_MODELS
+    for model in models_to_try:
         try:
+            response = client.text_generation(inputs,
+                                              temperature=1.0,
+                                              max_new_tokens=1000,
+                                              seed=seed)
+            strict_response = str(response)
+            repaired_response = repair_json(strict_response,
+                                            return_objects=True)
+            if isinstance(repaired_response, str):
+                raise HTTPException(status_code=500, detail="Invalid response from model")
+            else:
+                cleaned_response = {}
+                for key, value in repaired_response.items():
+                    cleaned_key = key.replace("###", "")
+                    cleaned_response[cleaned_key] = value
+                for i, text in enumerate(cleaned_response["New response"]):
+                    if i <= 2:
+                        sentences = tokenizer.tokenize(text)
+                        if sentences:
+                            cleaned_response["New response"][i] = sentences[0]
+                    else:
+                        del cleaned_response["New response"][i]
+                if cleaned_response.get("Sentence count"):
+                    if cleaned_response["Sentence count"] > 3:
+                        cleaned_response["Sentence count"] = 3
+                else:
+                    cleaned_response["Sentence count"] = len(cleaned_response["New response"])
+                data.history += str(cleaned_response)
+                return cleaned_response
         except Exception as e:
+            print(f"Model {model} failed with error: {e}")
+    raise HTTPException(status_code=500, detail="All models failed to generate response")