Spaces:

goliathaiconsulting
/

goliath-chatbot

Runtime error

App Files Files Community

goliath-chatbot / app.py

Michelangiolo

changes

cc21898 over 1 year ago

raw

history blame contribute delete

No virus

8.8 kB

	import requests

	#openai
	openai_api_key = "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"

	#azure
	azure_api_key = "c6d9cc1f487640cc92800d8d177f5f59"
	azure_api_base = "https://openai-619.openai.azure.com/" # your endpoint should look like the following https://YOUR_RESOURCE_NAME.openai.azure.com/
	azure_api_type = 'azure'
	azure_api_version = '2022-12-01' # this may change in the future

	def gpt3(prompt, model, service, max_tokens=400):

	if service == 'openai':
	if model == 'gpt-3.5-turbo':
	api_endpoint = "https://api.openai.com/v1/chat/completions"
	data = {
	"model": "gpt-3.5-turbo",
	"messages": [{"role": "user", "content": prompt}]
	}
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {openai_api_key}"
	}
	response = requests.post(api_endpoint, headers=headers, json=data)
	return response.json()['choices'][0]['message']['content']

	elif model == 'gpt-3':
	api_endpoint = "https://api.openai.com/v1/engines/text-davinci-003/completions"
	data = {
	"prompt": prompt,
	"max_tokens": max_tokens,
	"temperature": 0.5
	}
	headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {openai_api_key}"
	}
	response = requests.post(api_endpoint, headers=headers, json=data)
	return response.json()["choices"][0]["text"]

	elif service == 'azure':

	if model == 'gpt-3':
	azure_deployment_name='gpt3'

	api_endpoint = f"""{azure_api_base}openai/deployments/{azure_deployment_name}/completions?api-version={azure_api_version}"""

	headers = {
	"Content-Type": "application/json",
	"api-key": azure_api_key
	}

	data = {
	"prompt": prompt,
	"max_tokens": max_tokens
	}
	response = requests.post(api_endpoint, headers=headers, json=data)

	generated_text = response.json()["choices"][0]["text"]
	return generated_text

	elif model == 'gpt-3.5-turbo':
	azure_deployment_name='gpt-35-turbo' #cannot be creative with the name
	headers = {
	"Content-Type": "application/json",
	"api-key": azure_api_key
	}
	json_data = {
	'messages': [
	{
	'role': 'user',
	'content': prompt,
	},
	],
	}
	api_endpoint = f"""{azure_api_base}openai/deployments/{azure_deployment_name}/chat/completions?api-version=2023-03-15-preview"""
	response = requests.post(api_endpoint, headers=headers, json=json_data)
	return response.json()['choices'][0]['message']['content']

	#azure is much more sensible to max_tokens
	# gpt3('how are you?', model='gpt-3.5-turbo', service='azure')

	def text2vec(input, service):
	if service == 'openai':
	api_endpoint = 'https://api.openai.com/v1/embeddings'
	headers = {
	'Content-Type': 'application/json',
	'Authorization': 'Bearer ' + "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",
	}
	json_data = {
	'input': input,
	'model': 'text-embedding-ada-002',
	}
	# response = requests.post(api_endpoint, headers=headers, json=json_data)

	elif service == 'azure':
	azure_deployment_name = 'gpt3_embedding'
	api_endpoint = f"""{azure_api_base}openai/deployments/{azure_deployment_name}/embeddings?api-version={azure_api_version}"""
	headers = {
	"Content-Type": "application/json",
	"api-key": azure_api_key
	}
	json_data = {
	"input": input
	}

	response = requests.post(api_endpoint, headers=headers, json=json_data)
	vec = response.json()['data'][0]['embedding'] #len=1536 #pricing=0.0004
	return vec

	def list2vec(list1):
	headers = {
	'Content-Type': 'application/json',
	'Authorization': 'Bearer ' + "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",
	}

	json_data = {
	'input': list1,
	'model': 'text-embedding-ada-002',
	}

	response = requests.post('https://api.openai.com/v1/embeddings', headers=headers, json=json_data)
	return [x['embedding'] for x in response.json()['data']]

	dict1 = dict()
	for index in range(len(json_data['input'])):
	dict1[json_data['input'][index]] = response.json()['data'][index]['embedding']
	return dict1

	import requests
	import os
	os.system('pip install openpyxl')
	os.system('pip install sentence-transformers==2.2.2')
	os.system('pip install torch==1.13.0')
	import torch
	import pandas as pd
	from sentence_transformers import SentenceTransformer, util

	#reference filter
	def gpt3_reference(last_context, query):
	#needs to be referred to the second
	# last_context = 'you are a company'
	# query = """what do you do"""

	prompt = f"""
	context : {last_context}
	query : {query}
	instructions:
	apply a coreference resolution on the query and replace the pronoun with no temperature, no adjectives
	"""
	#only if pronoun is unclear, replace query pronoun with its reference
	answer = gpt3(prompt, model='gpt-3.5-turbo', service='azure')

	#replacements
	answer = answer.replace('\n', '')
	answer = answer.replace('Answer:', '')
	answer = answer.replace('answer:', '')
	answer = answer.replace('answer', '')
	answer = answer.strip()
	return answer

	# gpt3_reference("you are a company. recommendation systems are expensive", "How much do you charge?")

	df = pd.read_parquet('df.parquet')
	df_qa = pd.read_parquet('df_qa.parquet')

	df_qa_ = df_qa.copy()
	df_ = df.copy()

	def qa(df_, df_qa_, min_qa_score, min_context_score, verbose, query):
	query_vec = text2vec(query, 'azure')
	query_vec = torch.DoubleTensor(query_vec)

	#first check if there is already a question in df_qa
	df_qa_['score'] = df_qa_['text_vector_'].apply(lambda x : float(util.cos_sim(x, query_vec)))
	df_qa_ = df_qa_.sort_values('score', ascending=False)

	if verbose : display(df_qa_[0:5])
	df_qa_ = df_qa_[df_qa_['score']>=min_qa_score]
	#if we find at least one possible preset answer
	if len(df_qa_) > 0:
	answer = df_qa_[0:1]['answer'].values.tolist()[0]
	return answer

	#then check if we can use the context to answer a question
	df_['score'] = df_['text_vector_'].apply(lambda x : float(util.cos_sim(x, query_vec)))
	df_ = df_.sort_values('score', ascending=False)
	if verbose : display(df_[0:5])
	df_ = df_[df_['score']>=min_context_score]
	#if we find at least one possible preset answer
	if len(df_) > 0:
	#in case we might decide to merge multiple context
	context = ' '.join(df_['description'][0:1].values.tolist())
	prompt = f"""
	context: {context}
	query: {query}
	Answer the query using context. Do not justify the answer.
	"""
	answer = gpt3(prompt, model='gpt-3.5-turbo', service='azure')
	return answer
	else:
	return 'impossible to give an answer'

	import subprocess
	import random
	import gradio as gr
	import requests

	history = None

	def predict(input, history, last_context):
	last_context += 'you are a company'

	#WE CAN PLAY WITH user_input AND bot_answer, as well as history
	user_input = input

	query = gpt3_reference(last_context, user_input)
	bot_answer = qa(
	df_,
	df_qa_,
	min_qa_score=0.92,
	min_context_score=.75,
	verbose=False,
	query=input
	)

	response = list()
	response = [(input, bot_answer)]

	history.append(response[0])
	response = history

	last_context = input

	# print('#history', history)
	# print('#response', response)

	return response, history, last_context

	demo = gr.Blocks()
	with demo:
	gr.Markdown(
	"""
	Chatbot
	"""
	)
	state = gr.Variable(value=[]) #beginning
	last_context = gr.Variable(value='') #beginning
	chatbot = gr.Chatbot() #color_map=("#00ff7f", "#00d5ff")
	text = gr.Textbox(
	label="Question",
	value="What is a recommendation system?",
	placeholder="",
	max_lines=1,
	)
	text.submit(predict, [text, state, last_context], [chatbot, state, last_context])
	text.submit(lambda x: "", text, text)
	# btn = gr.Button(value="submit")
	# btn.click(chatbot_foo, None, [chatbot, state])

	demo.launch(share=False)