awacke1's picture
Update app.py
e8848c0 verified
raw
history blame
No virus
5.07 kB
import streamlit as st
from openai import OpenAI
import os
import base64
import cv2
from moviepy.editor import VideoFileClip
API_KEY = os.getenv('gpt4okey')
# Set the API key and model name
MODEL = "gpt-4o"
# Switch to project based with limits and use org id and key to identify run pool
# models for GPT-4o project: gpt-4o-2024-05-13 (gpt-4o)
#client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "<your OpenAI API key if not set as an env var>"))
client = OpenAI(api_key=API_KEY)
def process_text():
text_input = st.text_input("Enter your text:")
if text_input:
completion = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": "You are a helpful assistant. Help me with my math homework!"},
{"role": "user", "content": f"Hello! Could you solve {text_input}?"}
]
)
st.write("Assistant: " + completion.choices[0].message.content)
def process_image(image_input):
if image_input:
base64_image = base64.b64encode(image_input.read()).decode("utf-8")
response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": "You are a helpful assistant that responds in Markdown. Help me with my math homework!"},
{"role": "user", "content": [
{"type": "text", "text": "What's the area of the triangle?"},
{"type": "image_url", "image_url": {
"url": f"data:image/png;base64,{base64_image}"}
}
]}
],
temperature=0.0,
)
st.markdown(response.choices[0].message.content)
def process_audio(audio_input):
if audio_input:
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=audio_input,
)
response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": "You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."},
{"role": "user", "content": [
{"type": "text", "text": f"The audio transcription is: {transcription.text}"}
]},
],
temperature=0,
)
st.markdown(response.choices[0].message.content)
def process_video(video_input):
if video_input:
base64Frames, audio_path = process_video_frames(video_input)
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=open(audio_path, "rb"),
)
response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": "You are generating a video summary. Create a summary of the provided video and its transcript. Respond in Markdown"},
{"role": "user", "content": [
"These are the frames from the video.",
*map(lambda x: {"type": "image_url",
"image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames),
{"type": "text", "text": f"The audio transcription is: {transcription.text}"}
]},
],
temperature=0,
)
st.markdown(response.choices[0].message.content)
def process_video_frames(video_path, seconds_per_frame=2):
base64Frames = []
base_video_path, _ = os.path.splitext(video_path.name)
video = cv2.VideoCapture(video_path.name)
total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
fps = video.get(cv2.CAP_PROP_FPS)
frames_to_skip = int(fps * seconds_per_frame)
curr_frame = 0
while curr_frame < total_frames - 1:
video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
success, frame = video.read()
if not success:
break
_, buffer = cv2.imencode(".jpg", frame)
base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
curr_frame += frames_to_skip
video.release()
audio_path = f"{base_video_path}.mp3"
clip = VideoFileClip(video_path.name)
clip.audio.write_audiofile(audio_path, bitrate="32k")
clip.audio.close()
clip.close()
return base64Frames, audio_path
def main():
st.title("Omni Demo")
option = st.selectbox("Select an option", ("Text", "Image", "Audio", "Video"))
if option == "Text":
process_text()
elif option == "Image":
image_input = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
process_image(image_input)
elif option == "Audio":
audio_input = st.file_uploader("Upload an audio file", type=["mp3", "wav"])
process_audio(audio_input)
elif option == "Video":
video_input = st.file_uploader("Upload a video file", type=["mp4"])
process_video(video_input)
if __name__ == "__main__":
main()