BloodRain666's picture
Update app.py
4343090 verified
raw
history blame contribute delete
No virus
1.27 kB
# import gradio as gr
# import os
# gr.load("models/google/gemma-1.1-7b-it", hf_token=os.environ.get("YOUR_API_TOKEN"), streaming=True).launch()
import gradio as gr
import os
os.system('pip install openai')
from openai import OpenAI
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1",
api_key=os.environ.get('YOUR_API_TOKEN')
)
def predict(message, history, test=""):
print("1 ", message)
print("2 ", history)
history_openai_format = []
for human, assistant in history:
history_openai_format.append({"role": "user", "content": human })
history_openai_format.append({"role": "assistant", "content":assistant})
history_openai_format.append({"role": "user", "content": message})
response = client.chat.completions.create(model='meta-llama/Meta-Llama-3-8B-Instruct',
# response = client.chat.completions.create(model='nvidia/Llama3-ChatQA-1.5-8B',
messages= history_openai_format,
temperature=0.7,
stream=True,
max_tokens=3000)
partial_message = ""
for chunk in response:
if chunk.choices[0].delta.content is not None:
partial_message = partial_message + chunk.choices[0].delta.content
yield partial_message
gr.ChatInterface(predict).launch()