|
from ctransformers import AutoModelForCausalLM |
|
from fastapi import FastAPI |
|
from pydantic import BaseModel |
|
|
|
|
|
llm = AutoModelForCausalLM.from_pretrained("./model", |
|
model_type='mistral', |
|
threads=2) |
|
|
|
|
|
class Validation(BaseModel): |
|
inputs: str |
|
temperature: float = 0.0 |
|
max_new_tokens: int = 1048 |
|
top_p: float = 0.15 |
|
repetition_penalty: float = 1.0 |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
@app.post("/") |
|
async def stream(item: Validation): |
|
response = llm(item.inputs, |
|
temperature=item.temperature, |
|
max_new_tokens=item.max_new_tokens, |
|
top_p=item.top_p, |
|
repetition_penalty=item.repetition_penalty) |
|
return response |
|
|