File size: 2,191 Bytes
a32b5f4
 
c633f3a
a32b5f4
608f3af
 
a32b5f4
 
 
 
 
 
 
608f3af
 
58e74a8
608f3af
a32b5f4
 
13f7006
 
 
a32b5f4
107e5b8
a32b5f4
 
58e74a8
 
 
 
 
 
 
 
 
 
 
 
 
 
a32b5f4
 
 
 
 
 
 
 
 
 
 
 
 
debfe31
608f3af
 
 
 
 
 
 
 
 
 
debfe31
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
import gradio as gr

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_id = "Narrativaai/BioGPT-Large-finetuned-chatdoctor"

tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large")

model = AutoModelForCausalLM.from_pretrained(model_id)

# Move the model to the device
model = model.to(device)
model.eval()  # Set the model to evaluation mode

def answer_question(
        prompt,
        temperature=0.1,
        top_p=0.75,
        top_k=40,
        num_beams=2,
        do_sample=True,
        **kwargs,
):
    with torch.no_grad():  # Disable gradient calculation
        inputs = tokenizer(prompt, return_tensors="pt")
        # Move the inputs to the device
        inputs = {key: val.to(device) for key, val in inputs.items()}
        input_ids = inputs["input_ids"]
        attention_mask = inputs["attention_mask"]
        generation_config = GenerationConfig(
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
            num_beams=num_beams,
            do_sample=do_sample,
            **kwargs,
        )
        generation_output = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=512,
            eos_token_id=tokenizer.eos_token_id
        )
    s = generation_output.sequences[0]
    output = tokenizer.decode(s, skip_special_tokens=True)
    return output.split(" Response:")[1]

def gui_interface(prompt):
    prompt="""
    Below is an instruction that describes a task, paired with an input that provides further context.Write a response that appropriately completes the request.
    
    ### Instruction:
    If you are a doctor, please answer the medical questions based on the patient's description.
    
    ### Input:
    """+prompt+"""
    ### Response:
    """
    return answer_question(prompt)

iface = gr.Interface(fn=gui_interface, inputs="text", outputs="text")
iface.launch()