omersaidd commited on
Commit
493e200
β€’
1 Parent(s): c8eee4f

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +13 -13
  2. app.py +166 -0
  3. requirements.txt +3 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: AIDC-AI Ovis1.6-Gemma2-9B
3
- emoji: πŸ‘€
4
- colorFrom: gray
5
- colorTo: green
6
- sdk: gradio
7
- sdk_version: 4.44.0
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: AIDC-AI Ovis1.6-Gemma2-9B
3
+ emoji: 🐨
4
+ colorFrom: gray
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 4.44.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import os
3
+ import re
4
+ import time
5
+ import gradio as gr
6
+ import torch
7
+ from transformers import AutoModelForCausalLM
8
+ from transformers import TextIteratorStreamer
9
+ from threading import Thread
10
+
11
+ model_name = 'AIDC-AI/Ovis1.6-Gemma2-9B'
12
+
13
+ # load model
14
+ model = AutoModelForCausalLM.from_pretrained(model_name,
15
+ torch_dtype=torch.bfloat16,
16
+ multimodal_max_length=8192,
17
+ trust_remote_code=True).to(device='cpu')
18
+ text_tokenizer = model.get_text_tokenizer()
19
+ visual_tokenizer = model.get_visual_tokenizer()
20
+ streamer = TextIteratorStreamer(text_tokenizer, skip_prompt=True, skip_special_tokens=True)
21
+ image_placeholder = '<image>'
22
+ cur_dir = os.path.dirname(os.path.abspath(__file__))
23
+
24
+ def submit_chat(chatbot, text_input):
25
+ response = ''
26
+ chatbot.append((text_input, response))
27
+ return chatbot ,''
28
+
29
+ # @spaces.GPU <-- Remove this line
30
+ def ovis_chat(chatbot, image_input):
31
+ # preprocess inputs
32
+ conversations = []
33
+ response = ""
34
+ text_input = chatbot[-1][0]
35
+ for query, response in chatbot[:-1]:
36
+ conversations.append({
37
+ "from": "human",
38
+ "value": query
39
+ })
40
+ conversations.append({
41
+ "from": "gpt",
42
+ "value": response
43
+ })
44
+ text_input = text_input.replace(image_placeholder, '')
45
+ conversations.append({
46
+ "from": "human",
47
+ "value": text_input
48
+ })
49
+ if image_input is not None:
50
+ conversations[0]["value"] = image_placeholder + '\n' + conversations[0]["value"]
51
+ prompt, input_ids, pixel_values = model.preprocess_inputs(conversations, [image_input])
52
+ attention_mask = torch.ne(input_ids, text_tokenizer.pad_token_id)
53
+ input_ids = input_ids.unsqueeze(0).to(device='cpu')
54
+ attention_mask = attention_mask.unsqueeze(0).to(device='cpu')
55
+ if image_input is None:
56
+ pixel_values = [None]
57
+ else:
58
+ pixel_values = [pixel_values.to(dtype=visual_tokenizer.dtype, device='cpu')]
59
+
60
+ with torch.inference_mode():
61
+ gen_kwargs = dict(
62
+ max_new_tokens=512,
63
+ do_sample=False,
64
+ top_p=None,
65
+ top_k=None,
66
+ temperature=None,
67
+ repetition_penalty=None,
68
+ eos_token_id=model.generation_config.eos_token_id,
69
+ pad_token_id=text_tokenizer.pad_token_id,
70
+ use_cache=True
71
+ )
72
+ response = ""
73
+ thread = Thread(target=model.generate,
74
+ kwargs={"inputs": input_ids,
75
+ "pixel_values": pixel_values,
76
+ "attention_mask": attention_mask,
77
+ "streamer": streamer,
78
+ **gen_kwargs})
79
+ thread.start()
80
+ for new_text in streamer:
81
+ response += new_text
82
+ chatbot[-1][1] = response
83
+ yield chatbot
84
+ thread.join()
85
+ # debug
86
+ print('*'*60)
87
+ print('*'*60)
88
+ print('OVIS_CONV_START')
89
+ for i, (request, answer) in enumerate(chatbot[:-1], 1):
90
+ print(f'Q{i}:\n {request}')
91
+ print(f'A{i}:\n {answer}')
92
+ print('New_Q:\n', text_input)
93
+ print('New_A:\n', response)
94
+ print('OVIS_CONV_END')
95
+
96
+ def clear_chat():
97
+ return [], None, ""
98
+
99
+ with open(f"{cur_dir}/resource/logo.svg", "r", encoding="utf-8") as svg_file:
100
+ svg_content = svg_file.read()
101
+ font_size = "2.5em"
102
+ svg_content = re.sub(r'(<svg[^>]*)(>)', rf'\1 height="{font_size}" style="vertical-align: middle; display: inline-block;"\2', svg_content)
103
+ html = f"""
104
+ <p align="center" style="font-size: {font_size}; line-height: 1;">
105
+ <span style="display: inline-block; vertical-align: middle;">{svg_content}</span>
106
+ <span style="display: inline-block; vertical-align: middle;">{model_name.split('/')[-1]}</span>
107
+ </p>
108
+ <center><font size=3><b>Ovis</b> has been open-sourced on <a href='https://huggingface.co/{model_name}'>😊 Huggingface</a> and <a href='https://github.com/AIDC-AI/Ovis'>🌟 GitHub</a>. If you find Ovis useful, a like❀️ or a star🌟 would be appreciated.</font></center>
109
+ """
110
+
111
+ latex_delimiters_set = [{
112
+ "left": "\\(",
113
+ "right": "\\)",
114
+ "display": False
115
+ }, {
116
+ "left": "\\begin{equation}",
117
+ "right": "\\end{equation}",
118
+ "display": True
119
+ }, {
120
+ "left": "\\begin{align}",
121
+ "right": "\\end{align}",
122
+ "display": True
123
+ }, {
124
+ "left": "\\begin{alignat}",
125
+ "right": "\\end{alignat}",
126
+ "display": True
127
+ }, {
128
+ "left": "\\begin{gather}",
129
+ "right": "\\end{gather}",
130
+ "display": True
131
+ }, {
132
+ "left": "\\begin{CD}",
133
+ "right": "\\end{CD}",
134
+ "display": True
135
+ }, {
136
+ "left": "\\[",
137
+ "right": "\\]",
138
+ "display": True
139
+ }]
140
+
141
+ text_input = gr.Textbox(label="prompt", placeholder="Enter your text here...", lines=1, container=False)
142
+ with gr.Blocks(title=model_name.split('/')[-1]) as demo:
143
+ gr.HTML(html)
144
+ with gr.Row():
145
+ with gr.Column(scale=3):
146
+ image_input = gr.Image(label="image", height=350, type="pil")
147
+ gr.Examples(
148
+ examples=[
149
+ [f"{cur_dir}/examples/case0.png", "Find the area of the shaded region."],
150
+ [f"{cur_dir}/examples/case1.png", "explain this model to me."],
151
+ [f"{cur_dir}/examples/case2.png", "What is net profit margin as a percentage of total revenue?"],
152
+ ],
153
+ inputs=[image_input, text_input]
154
+ )
155
+ with gr.Column(scale=7):
156
+ chatbot = gr.Chatbot(label="Ovis", layout="panel", height=600, show_copy_button=True, latex_delimiters=latex_delimiters_set)
157
+ text_input.render()
158
+ with gr.Row():
159
+ send_btn = gr.Button("Send", variant="primary")
160
+ clear_btn = gr.Button("Clear", variant="secondary")
161
+
162
+ send_click_event = send_btn.click(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
163
+ submit_event = text_input.submit(submit_chat, [chatbot, text_input], [chatbot, text_input]).then(ovis_chat,[chatbot, image_input],chatbot)
164
+ clear_btn.click(clear_chat, outputs=[chatbot, image_input, text_input])
165
+
166
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ numpy==1.24.3
2
+ torch==2.2.0
3
+ transformers==4.44.2