comidan commited on
Commit
cd6c1e0
1 Parent(s): 925216a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -24
app.py CHANGED
@@ -148,38 +148,19 @@ def gradio_answer(chatbot, chat_state, img_list, num_beams, temperature):
148
  title = """
149
  <h1 align="center"><a href="https://github.com/DAMO-NLP-SG/Video-LLaMA"><img src="https://s1.ax1x.com/2023/05/22/p9oQ0FP.jpg", alt="Video-LLaMA" border="0" style="margin: 0 auto; height: 200px;" /></a> </h1>
150
 
151
- <h1 align="center">Video-LLaMA: An Instruction-tuned Audio-Visual Language Model for Video Understanding</h1>
 
 
152
 
153
  <h5 align="center"> Introduction: Video-LLaMA is a multi-model large language model that achieves video-grounded conversations between humans and computers \
154
  by connecting language decoder with off-the-shelf unimodal pre-trained models. </h5>
155
 
156
- <div style='display:flex; gap: 0.25rem; '>
157
- <a href='https://github.com/DAMO-NLP-SG/Video-LLaMA'><img src='https://img.shields.io/badge/Github-Code-success'></a>
158
- <a href='https://huggingface.co/spaces/DAMO-NLP-SG/Video-LLaMA'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
159
- <a href='https://huggingface.co/DAMO-NLP-SG/Video-LLaMA-Series'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
160
- <a href='https://modelscope.cn/studios/damo/video-llama/summary'><img src='https://img.shields.io/badge/ModelScope-Demo-blueviolet'></a>
161
- <a href='https://arxiv.org/abs/2306.02858'><img src='https://img.shields.io/badge/Paper-PDF-red'></a>
162
- </div>
163
-
164
-
165
- Thank you for using the Video-LLaMA Demo Page! If you have any questions or feedback, feel free to contact us.
166
-
167
- If you find Video-LLaMA interesting, please give us a star on GitHub.
168
-
169
- Current online demo uses the 7B version of Video-LLaMA due to resource limitations. We have released \
170
- the 13B version on our GitHub repository.
171
 
 
172
 
 
173
  """
174
 
175
- Note_markdown = ("""
176
- ### Note
177
- Video-LLaMA is a prototype model and may have limitations in understanding complex scenes, long videos, or specific domains.
178
- The output results may be influenced by input quality, limitations of the dataset, and the model's susceptibility to illusions. Please interpret the results with caution.
179
-
180
- **Copyright 2023 Alibaba DAMO Academy.**
181
- """)
182
-
183
  cite_markdown = ("""
184
  ## Citation
185
  If you find our project useful, hope you can star our repo and cite our paper as follows:
 
148
  title = """
149
  <h1 align="center"><a href="https://github.com/DAMO-NLP-SG/Video-LLaMA"><img src="https://s1.ax1x.com/2023/05/22/p9oQ0FP.jpg", alt="Video-LLaMA" border="0" style="margin: 0 auto; height: 200px;" /></a> </h1>
150
 
151
+ <h1><img src="https://upload.wikimedia.org/wikipedia/commons/thumb/5/51/IBM_logo.svg/1000px-IBM_logo.svg.png", alt="Video-LLaMA" border="0" style="margin: 0 auto; height: 200px;" /></h1>
152
+
153
+ <h1 align="center">Video-LLaMA-2: An Instruction-tuned Audio-Visual Language Model for Video Understanding</h1>
154
 
155
  <h5 align="center"> Introduction: Video-LLaMA is a multi-model large language model that achieves video-grounded conversations between humans and computers \
156
  by connecting language decoder with off-the-shelf unimodal pre-trained models. </h5>
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ Current online demo uses the 7B version of Video-LLaMA-2 due to resource limitations of running on a Nvidia A10.
160
 
161
+ From the IBM Generative AI Italy team who better adapted the model for LLAMA-2-7B. For any issue contact daniele.comi@ibm.com
162
  """
163
 
 
 
 
 
 
 
 
 
164
  cite_markdown = ("""
165
  ## Citation
166
  If you find our project useful, hope you can star our repo and cite our paper as follows: