New-Place / main.py
oflakne26's picture
Update main.py
2c4669b verified
raw
history blame
No virus
861 Bytes
from ctransformers import AutoModelForCausalLM
from fastapi import FastAPI
from pydantic import BaseModel
# Model loading from local file
llm = AutoModelForCausalLM.from_pretrained("./model",
model_type='mistral',
threads=2)
# Pydantic object
class Validation(BaseModel):
inputs: str
temperature: float = 0.0
max_new_tokens: int = 1048
top_p: float = 0.15
repetition_penalty: float = 1.0
# FastAPI
app = FastAPI()
# Generate LLM completion
@app.post("/")
async def stream(item: Validation):
response = llm(item.inputs,
temperature=item.temperature,
max_new_tokens=item.max_new_tokens,
top_p=item.top_p,
repetition_penalty=item.repetition_penalty)
return response