Spaces:

comidan
/

video-llama2-test

Paused

App Files Files Community

comidan commited on Oct 9, 2023

Commit

925216a

•

1 Parent(s): dfcbabb

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -9

app.py CHANGED Viewed

@@ -92,19 +92,33 @@ def upload_imgorvideo(gr_video, gr_img, text_input, chat_state,chatbot,audio_fla
     elif gr_img is not None and gr_video is None:
         print(gr_img)
         chatbot = chatbot + [((gr_img,), None)]
-        chat_state.system =  "You are able to understand the visual content that the user provides. Follow the instructions carefully and explain your answers in detail."
         img_list = []
         llm_message = chat.upload_img(gr_img, chat_state, img_list)
         return gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_list,chatbot
     elif gr_video is not None and gr_img is None:
         print(gr_video)
         chatbot = chatbot + [((gr_video,), None)]
-        chat_state.system =  ""
         img_list = []
-        if audio_flag:
-            llm_message = chat.upload_video(gr_video, chat_state, img_list)
-        else:
-            llm_message = chat.upload_video_without_audio(gr_video, chat_state, img_list)
         return gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_list,chatbot
     else:
         # img_list = []
@@ -121,15 +135,16 @@ def gradio_ask(user_message, chatbot, chat_state):
 def gradio_answer(chatbot, chat_state, img_list, num_beams, temperature):
     llm_message = chat.answer(conv=chat_state,
                               img_list=img_list,
-                              num_beams=num_beams,
                               temperature=temperature,
-                              max_new_tokens=300,
-                              max_length=2000)[0]
     chatbot[-1][1] = llm_message
     print(chat_state.get_prompt())
     print(chat_state)
     return chatbot, chat_state, img_list
 title = """
 <h1 align="center"><a href="https://github.com/DAMO-NLP-SG/Video-LLaMA"><img src="https://s1.ax1x.com/2023/05/22/p9oQ0FP.jpg", alt="Video-LLaMA" border="0" style="margin: 0 auto; height: 200px;" /></a> </h1>

     elif gr_img is not None and gr_video is None:
         print(gr_img)
         chatbot = chatbot + [((gr_img,), None)]
+        chat_state = Conversation(
+            system= "You are able to understand the visual content that the user provides."
+           "Follow the instructions carefully and explain your answers in detail.",
+            roles=("Human", "Assistant"),
+            messages=[],
+            offset=0,
+            sep_style=SeparatorStyle.SINGLE,
+            sep="###",
+        )
         img_list = []
         llm_message = chat.upload_img(gr_img, chat_state, img_list)
         return gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_list,chatbot
     elif gr_video is not None and gr_img is None:
         print(gr_video)
         chatbot = chatbot + [((gr_video,), None)]
+        chat_state = default_conversation.copy()
+        chat_state = Conversation(
+            system= "You are able to understand the visual content that the user provides."
+           "Follow the instructions carefully and explain your answers in detail.",
+            roles=("Human", "Assistant"),
+            messages=[],
+            offset=0,
+            sep_style=SeparatorStyle.SINGLE,
+            sep="###",
+        )
         img_list = []
+        llm_message = chat.upload_video(gr_video, chat_state, img_list)
         return gr.update(interactive=False), gr.update(interactive=False), gr.update(interactive=True, placeholder='Type and press Enter'), gr.update(value="Start Chatting", interactive=False), chat_state, img_list,chatbot
     else:
         # img_list = []
 def gradio_answer(chatbot, chat_state, img_list, num_beams, temperature):
     llm_message = chat.answer(conv=chat_state,
                               img_list=img_list,
+                              num_beams=1,
                               temperature=temperature,
+                              max_new_tokens=240,
+                              max_length=511)[0]
     chatbot[-1][1] = llm_message
     print(chat_state.get_prompt())
     print(chat_state)
     return chatbot, chat_state, img_list
 title = """
 <h1 align="center"><a href="https://github.com/DAMO-NLP-SG/Video-LLaMA"><img src="https://s1.ax1x.com/2023/05/22/p9oQ0FP.jpg", alt="Video-LLaMA" border="0" style="margin: 0 auto; height: 200px;" /></a> </h1>