File size: 861 Bytes
5dab16f
e3d6348
5dab16f
 
e4cef2a
fb017cb
e4cef2a
2c4669b
5dab16f
e4cef2a
e3d6348
 
 
 
 
 
e4cef2a
 
5dab16f
4e06fbd
e3d6348
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from ctransformers import AutoModelForCausalLM
from fastapi import FastAPI
from pydantic import BaseModel

# Model loading from local file
llm = AutoModelForCausalLM.from_pretrained("./model",
                                           model_type='mistral',
                                           threads=2)

# Pydantic object
class Validation(BaseModel):
    inputs: str
    temperature: float = 0.0
    max_new_tokens: int = 1048
    top_p: float = 0.15
    repetition_penalty: float = 1.0

# FastAPI
app = FastAPI()

# Generate LLM completion
@app.post("/")
async def stream(item: Validation):
    response = llm(item.inputs, 
                   temperature=item.temperature, 
                   max_new_tokens=item.max_new_tokens, 
                   top_p=item.top_p, 
                   repetition_penalty=item.repetition_penalty)
    return response