Spaces:

Roboflow
/

HotDogGPT

Running

App Files Files Community

HotDogGPT / app.py

SkalskiP

:tada: initial commit

8274479 11 months ago

raw

history blame contribute delete

No virus

3.65 kB

	import base64

	import cv2
	import gradio as gr
	import numpy as np
	import requests

	MARKDOWN = """
	# HotDogGPT 💬 + 🌭

	HotDogGPT is OpenAI Vision API experiment reproducing the famous
	[Hot Dog, Not Hot Dog](https://www.youtube.com/watch?v=ACmydtFDTGs) app from Silicon
	Valley.

	<p align="center">
	<img width="600" src="https://miro.medium.com/v2/resize:fit:650/1*VrpXE1hE4rO1roK0laOd7g.png" alt="hotdog">
	</p>

	Visit [awesome-openai-vision-api-experiments](https://github.com/roboflow/awesome-openai-vision-api-experiments)
	repository to find more OpenAI Vision API experiments or contribute your own.
	"""
	API_URL = "https://api.openai.com/v1/chat/completions"
	CLASSES = ["🌭 Hot Dog", "❌ Not Hot Dog"]


	def preprocess_image(image: np.ndarray) -> np.ndarray:
	image = np.fliplr(image)
	return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)


	def encode_image_to_base64(image: np.ndarray) -> str:
	success, buffer = cv2.imencode('.jpg', image)
	if not success:
	raise ValueError("Could not encode image to JPEG format.")

	encoded_image = base64.b64encode(buffer).decode('utf-8')
	return encoded_image


	def compose_payload(image: np.ndarray, prompt: str) -> dict:
	base64_image = encode_image_to_base64(image)
	return {
	"model": "gpt-4-vision-preview",
	"messages": [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	}
	]
	}
	],
	"max_tokens": 300
	}


	def compose_classification_prompt(classes: list) -> str:
	return (f"What is in the image? Return the class of the object in the image. Here "
	f"are the classes: {', '.join(classes)}. You can only return one class "
	f"from that list.")


	def compose_headers(api_key: str) -> dict:
	return {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {api_key}"
	}


	def prompt_image(api_key: str, image: np.ndarray, prompt: str) -> str:
	headers = compose_headers(api_key=api_key)
	payload = compose_payload(image=image, prompt=prompt)
	response = requests.post(url=API_URL, headers=headers, json=payload).json()

	if 'error' in response:
	raise ValueError(response['error']['message'])
	return response['choices'][0]['message']['content']


	def classify_image(api_key: str, image: np.ndarray) -> str:
	if not api_key:
	raise ValueError(
	"API_KEY is not set. "
	"Please follow the instructions in the README to set it up.")
	image = preprocess_image(image=image)
	prompt = compose_classification_prompt(classes=CLASSES)
	response = prompt_image(api_key=api_key, image=image, prompt=prompt)
	return response


	with gr.Blocks() as demo:
	gr.Markdown(MARKDOWN)
	api_key_textbox = gr.Textbox(
	label="🔑 OpenAI API", type="password")

	with gr.TabItem("Basic"):
	with gr.Column():
	input_image = gr.Image(
	image_mode='RGB', type='numpy', height=500)
	output_text = gr.Textbox(
	label="Output")
	submit_button = gr.Button("Submit")

	submit_button.click(
	fn=classify_image,
	inputs=[api_key_textbox, input_image],
	outputs=output_text)

	demo.launch(debug=False, show_error=True)