Kowsher commited on
Commit
6ebccb5
1 Parent(s): 2972eba

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +47 -0
handler.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ from typing import Any, Dict
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+
7
+ class EndpointHandler:
8
+ def __init__(self, path=""):
9
+ # load model and tokenizer from path
10
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
11
+ self.model = AutoModelForCausalLM.from_pretrained(
12
+ path, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True
13
+ )
14
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
15
+
16
+ def __call__(self, data: Dict[str, Any]) -> Dict[str, str]:
17
+ # process input
18
+ inputs = data.pop("inputs", data)
19
+
20
+ parameters = data.pop("parameters", None)
21
+
22
+ # preprocess
23
+ print(print("inputs......", inputs))
24
+ inputs = self.tokenizer(inputs, return_tensors="pt").to(self.device)
25
+ print("inputs......", inputs)
26
+
27
+
28
+
29
+ t=0
30
+ for j in range(len(inputs['token_type_ids'][0])):
31
+ if inputs['input_ids'][0][j]==39 and inputs['input_ids'][0][j+1]== 5584:
32
+ t=0
33
+ if inputs['input_ids'][0][j]==39 and inputs['input_ids'][0][j+1]== 13359:
34
+ t=1
35
+ inputs['token_type_ids'][0][j]=t
36
+
37
+
38
+ # pass inputs with all kwargs in data
39
+ if parameters is not None:
40
+ outputs = self.model.generate(**inputs, **parameters)
41
+ else:
42
+ outputs = self.model.generate(**inputs)
43
+
44
+ # postprocess the prediction
45
+ prediction = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
46
+
47
+ return [{"generated_text": prediction}]