Manjushri commited on
Commit
3feebcb
1 Parent(s): 94832fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -307
app.py CHANGED
@@ -3,317 +3,102 @@ import torch
3
  import numpy as np
4
  import modin.pandas as pd
5
  from PIL import Image
6
- from diffusers import DiffusionPipeline, StableDiffusionLatentUpscalePipeline
 
 
 
 
 
 
 
7
 
 
 
8
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
9
- torch.cuda.max_memory_allocated(device=device)
10
- torch.cuda.empty_cache()
 
 
 
11
 
12
- def genie (Model, Prompt, negative_prompt, height, width, scale, steps, seed, refine, high_noise_frac, upscale):
13
- generator = np.random.seed(0) if seed == 0 else torch.manual_seed(seed)
14
-
15
- if Model == "PhotoReal":
16
- pipe = DiffusionPipeline.from_pretrained("circulus/canvers-real-v3.8.1", torch_dtype=torch.float16, safety_checker=None) if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("circulus/canvers-real-v3.8.1")
17
- pipe.enable_xformers_memory_efficient_attention()
18
- pipe = pipe.to(device)
19
- torch.cuda.empty_cache()
20
- if refine == "Yes":
21
- refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True, torch_dtype=torch.float16, variant="fp16") if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0")
22
- refiner.enable_xformers_memory_efficient_attention()
23
- refiner = refiner.to(device)
24
- torch.cuda.empty_cache()
25
- int_image = pipe(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images
26
- image = refiner(Prompt, negative_prompt=negative_prompt, image=int_image, denoising_start=high_noise_frac).images[0]
27
- torch.cuda.empty_cache()
28
- if upscale == "Yes":
29
- refiner = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
30
- refiner.enable_xformers_memory_efficient_attention()
31
- refiner = refiner.to(device)
32
- torch.cuda.empty_cache()
33
- upscaled = refiner(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
34
- torch.cuda.empty_cache()
35
- return upscaled
36
- else:
37
- return image
38
- else:
39
- if upscale == "Yes":
40
- image = pipe(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
41
- upscaler = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
42
- upscaler.enable_xformers_memory_efficient_attention()
43
- upscaler = upscaler.to(device)
44
- torch.cuda.empty_cache()
45
- upscaled = upscaler(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
46
- torch.cuda.empty_cache()
47
- return upscaled
48
- else:
49
- image = pipe(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
50
- torch.cuda.empty_cache()
51
- return image
52
 
53
- if Model == "Anime":
54
- anime = DiffusionPipeline.from_pretrained("circulus/canvers-anime-v3.8.1", torch_dtype=torch.float16, safety_checker=None) if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("circulus/canvers-anime-v3.8.1")
55
- anime.enable_xformers_memory_efficient_attention()
56
- anime = anime.to(device)
57
- torch.cuda.empty_cache()
58
- if refine == "Yes":
59
- refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True, torch_dtype=torch.float16, variant="fp16") if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0")
60
- refiner.enable_xformers_memory_efficient_attention()
61
- refiner = refiner.to(device)
62
- torch.cuda.empty_cache()
63
- int_image = anime(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images
64
- image = refiner(Prompt, negative_prompt=negative_prompt, image=int_image, denoising_start=high_noise_frac).images[0]
65
- torch.cuda.empty_cache()
66
- if upscale == "Yes":
67
- refiner = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
68
- refiner.enable_xformers_memory_efficient_attention()
69
- refiner = refiner.to(device)
70
- torch.cuda.empty_cache()
71
- upscaled = refiner(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
72
- torch.cuda.empty_cache()
73
- return upscaled
74
- else:
75
- return image
76
- else:
77
- if upscale == "Yes":
78
- image = anime(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
79
- upscaler = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
80
- upscaler.enable_xformers_memory_efficient_attention()
81
- upscaler = upscaler.to(device)
82
- torch.cuda.empty_cache()
83
- upscaled = upscaler(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
84
- torch.cuda.empty_cache()
85
- return upscaled
86
- else:
87
- image = anime(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
88
- torch.cuda.empty_cache()
89
- return image
90
-
91
- if Model == "Disney":
92
- disney = DiffusionPipeline.from_pretrained("circulus/canvers-disney-v3.8.1", torch_dtype=torch.float16, safety_checker=None) if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("circulus/canvers-disney-v3.8.1")
93
- disney.enable_xformers_memory_efficient_attention()
94
- disney = disney.to(device)
95
- torch.cuda.empty_cache()
96
- if refine == "Yes":
97
- refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True, torch_dtype=torch.float16, variant="fp16") if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0")
98
- refiner.enable_xformers_memory_efficient_attention()
99
- refiner = refiner.to(device)
100
- torch.cuda.empty_cache()
101
- int_image = disney(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images
102
- image = refiner(Prompt, negative_prompt=negative_prompt, image=int_image, denoising_start=high_noise_frac).images[0]
103
- torch.cuda.empty_cache()
104
-
105
- if upscale == "Yes":
106
- refiner = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
107
- refiner.enable_xformers_memory_efficient_attention()
108
- refiner = refiner.to(device)
109
- torch.cuda.empty_cache()
110
- upscaled = refiner(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
111
- torch.cuda.empty_cache()
112
- return upscaled
113
- else:
114
- return image
115
- else:
116
- if upscale == "Yes":
117
- image = disney(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
118
- upscaler = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
119
- upscaler.enable_xformers_memory_efficient_attention()
120
- upscaler = upscaler.to(device)
121
- torch.cuda.empty_cache()
122
- upscaled = upscaler(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
123
- torch.cuda.empty_cache()
124
- return upscaled
125
- else:
126
- image = disney(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
127
- torch.cuda.empty_cache()
128
- return image
129
-
130
- if Model == "StoryBook":
131
- story = DiffusionPipeline.from_pretrained("circulus/canvers-story-v3.8.1", torch_dtype=torch.float16, safety_checker=None) if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("circulus/canvers-story-v3.8.1")
132
- story.enable_xformers_memory_efficient_attention()
133
- story = story.to(device)
134
- torch.cuda.empty_cache()
135
- if refine == "Yes":
136
- refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True, torch_dtype=torch.float16, variant="fp16") if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0")
137
- refiner.enable_xformers_memory_efficient_attention()
138
- refiner = refiner.to(device)
139
- torch.cuda.empty_cache()
140
- int_image = story(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images
141
- image = refiner(Prompt, negative_prompt=negative_prompt, image=int_image, denoising_start=high_noise_frac).images[0]
142
- torch.cuda.empty_cache()
143
-
144
- if upscale == "Yes":
145
- refiner = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
146
- refiner.enable_xformers_memory_efficient_attention()
147
- refiner = refiner.to(device)
148
- torch.cuda.empty_cache()
149
- upscaled = refiner(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
150
- torch.cuda.empty_cache()
151
- return upscaled
152
- else:
153
- return image
154
- else:
155
- if upscale == "Yes":
156
- image = story(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
157
-
158
- upscaler = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
159
- upscaler.enable_xformers_memory_efficient_attention()
160
- upscaler = upscaler.to(device)
161
- torch.cuda.empty_cache()
162
- upscaled = upscaler(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
163
- torch.cuda.empty_cache()
164
- return upscaled
165
- else:
166
-
167
- image = story(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
168
- torch.cuda.empty_cache()
169
- return image
170
 
171
- if Model == "SemiReal":
172
- semi = DiffusionPipeline.from_pretrained("circulus/canvers-semi-v3.8.1", torch_dtype=torch.float16, safety_checker=None) if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("circulus/canvers-semi-v3.8.1")
173
- semi.enable_xformers_memory_efficient_attention()
174
- semi = semi.to(device)
175
- torch.cuda.empty_cache()
176
- if refine == "Yes":
177
- refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True, torch_dtype=torch.float16, variant="fp16") if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0")
178
- refiner.enable_xformers_memory_efficient_attention()
179
- refiner = refiner.to(device)
180
- torch.cuda.empty_cache()
181
- image = semi(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images
182
- image = refiner(Prompt, negative_prompt=negative_prompt, image=image, denoising_start=high_noise_frac).images[0]
183
- torch.cuda.empty_cache()
184
-
185
- if upscale == "Yes":
186
- refiner = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
187
- refiner.enable_xformers_memory_efficient_attention()
188
- refiner = refiner.to(device)
189
- torch.cuda.empty_cache()
190
- upscaled = refiner(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
191
- torch.cuda.empty_cache()
192
- return upscaled
193
- else:
194
- return image
195
- else:
196
- if upscale == "Yes":
197
- image = semi(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
198
-
199
- upscaler = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
200
- upscaler.enable_xformers_memory_efficient_attention()
201
- upscaler = upscaler.to(device)
202
- torch.cuda.empty_cache()
203
- upscaled = upscaler(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
204
- torch.cuda.empty_cache()
205
- return upscaled
206
- else:
207
-
208
- image = semi(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
209
- torch.cuda.empty_cache()
210
- return image
211
 
212
- if Model == "Animagine XL 3.0":
213
- animagine = DiffusionPipeline.from_pretrained("cagliostrolab/animagine-xl-3.0", torch_dtype=torch.float16, safety_checker=None) if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("cagliostrolab/animagine-xl-3.0")
214
- animagine.enable_xformers_memory_efficient_attention()
215
- animagine = animagine.to(device)
216
- torch.cuda.empty_cache()
217
- if refine == "Yes":
218
- torch.cuda.empty_cache()
219
- torch.cuda.max_memory_allocated(device=device)
220
- int_image = animagine(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale, output_type="latent").images
221
- torch.cuda.empty_cache()
222
- animagine = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True, torch_dtype=torch.float16, variant="fp16") if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0")
223
- animagine.enable_xformers_memory_efficient_attention()
224
- animagine = animagine.to(device)
225
- torch.cuda.empty_cache()
226
- image = animagine(Prompt, negative_prompt=negative_prompt, image=int_image, denoising_start=high_noise_frac).images[0]
227
- torch.cuda.empty_cache()
228
-
229
- if upscale == "Yes":
230
- animagine = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
231
- animagine.enable_xformers_memory_efficient_attention()
232
- animagine = animagine.to(device)
233
- torch.cuda.empty_cache()
234
- upscaled = animagine(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
235
- torch.cuda.empty_cache()
236
- return upscaled
237
- else:
238
- return image
239
- else:
240
- if upscale == "Yes":
241
- image = animagine(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
242
-
243
- upscaler = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
244
- upscaler.enable_xformers_memory_efficient_attention()
245
- upscaler = upscaler.to(device)
246
- torch.cuda.empty_cache()
247
- upscaled = upscaler(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
248
- torch.cuda.empty_cache()
249
- return upscaled
250
- else:
251
-
252
- image = animagine(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
253
- torch.cuda.empty_cache()
254
- return image
255
 
256
- if Model == "SDXL 1.0":
257
- torch.cuda.empty_cache()
258
- torch.cuda.max_memory_allocated(device=device)
259
- sdxl = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)
260
- sdxl.enable_xformers_memory_efficient_attention()
261
- sdxl = sdxl.to(device)
262
- torch.cuda.empty_cache()
263
-
264
- if refine == "Yes":
265
- torch.cuda.max_memory_allocated(device=device)
266
- torch.cuda.empty_cache()
267
- image = sdxl(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale, output_type="latent").images
268
- torch.cuda.empty_cache()
269
- sdxl = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", use_safetensors=True, torch_dtype=torch.float16, variant="fp16") if torch.cuda.is_available() else DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0")
270
- sdxl.enable_xformers_memory_efficient_attention()
271
- sdxl = sdxl.to(device)
272
- torch.cuda.empty_cache()
273
- refined = sdxl(Prompt, negative_prompt=negative_prompt, image=image, denoising_start=high_noise_frac).images[0]
274
- torch.cuda.empty_cache()
275
-
276
- if upscale == "Yes":
277
- sdxl = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
278
- sdxl.enable_xformers_memory_efficient_attention()
279
- sdxl = sdxl.to(device)
280
- torch.cuda.empty_cache()
281
- upscaled = sdxl(prompt=Prompt, negative_prompt=negative_prompt, image=refined, num_inference_steps=15, guidance_scale=0).images[0]
282
- torch.cuda.empty_cache()
283
- return upscaled
284
- else:
285
- return refined
286
- else:
287
- if upscale == "Yes":
288
- image = sdxl(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
289
-
290
- upscaler = DiffusionPipeline.from_pretrained("stabilityai/sd-x2-latent-upscaler", torch_dtype=torch.float16, use_safetensors=True)
291
- upscaler.enable_xformers_memory_efficient_attention()
292
- upscaler = upscaler.to(device)
293
- torch.cuda.empty_cache()
294
- upscaled = upscaler(prompt=Prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=15, guidance_scale=0).images[0]
295
- torch.cuda.empty_cache()
296
- return upscaled
297
- else:
298
-
299
- image = sdxl(Prompt, negative_prompt=negative_prompt, height=height, width=width, num_inference_steps=steps, guidance_scale=scale).images[0]
300
- torch.cuda.empty_cache()
301
-
302
-
303
- return image
304
-
305
- gr.Interface(fn=genie, inputs=[gr.Radio(['PhotoReal', 'Anime', 'Disney', 'StoryBook', 'SemiReal', 'Animagine XL 3.0', 'SDXL 1.0'], value='PhotoReal', label='Choose Model'),
306
- gr.Textbox(label='What you want the AI to generate. 77 Token Limit.'),
307
- gr.Textbox(label='What you Do Not want the AI to generate. 77 Token Limit'),
308
- gr.Slider(512, 1024, 768, step=128, label='Height'),
309
- gr.Slider(512, 1024, 768, step=128, label='Width'),
310
- gr.Slider(1, maximum=15, value=5, step=.25, label='Guidance Scale'),
311
- gr.Slider(25, maximum=100, value=50, step=25, label='Number of Iterations'),
312
- gr.Slider(minimum=0, step=1, maximum=9999999999999999, randomize=True, label='Seed: 0 is Random'),
313
- gr.Radio(["Yes", "No"], label='SDXL 1.0 Refiner: Use if the Image has too much Noise', value='No'),
314
- gr.Slider(minimum=.9, maximum=.99, value=.95, step=.01, label='Refiner Denoise Start %'),
315
- gr.Radio(["Yes", "No"], label = 'SD X2 Latent Upscaler?', value="No")],
316
- outputs=gr.Image(label='Generated Image'),
317
- title="Manju Dream Booth V1.7 with SDXL 1.0 Refiner and SD X2 Latent Upscaler - GPU",
318
- description="<br><br><b/>Warning: This Demo is capable of producing NSFW content.",
319
- article = "If You Enjoyed this Demo and would like to Donate, you can send any amount to any of these Wallets. <br><br>BTC: bc1qzdm9j73mj8ucwwtsjx4x4ylyfvr6kp7svzjn84 <br>BTC2: 3LWRoKYx6bCLnUrKEdnPo3FCSPQUSFDjFP <br>DOGE: DK6LRc4gfefdCTRk9xPD239N31jh9GjKez <br>SHIB (BEP20): 0xbE8f2f3B71DFEB84E5F7E3aae1909d60658aB891 <br>PayPal: https://www.paypal.me/ManjushriBodhisattva <br>ETH: 0xbE8f2f3B71DFEB84E5F7E3aae1909d60658aB891 <br><br>Code Monkey: <a href=\"https://huggingface.co/Manjushri\">Manjushri</a>").launch(debug=True, max_threads=80)
 
3
  import numpy as np
4
  import modin.pandas as pd
5
  from PIL import Image
6
+ from diffusers import DiffusionPipeline
7
+ from huggingface_hub import login
8
+ import os
9
+ from glob import glob
10
+ from pathlib import Path
11
+ from typing import Optional
12
+ import uuid
13
+ import random
14
 
15
+ token = os.environ['HF_TOKEN']
16
+ login(token=token)
17
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
18
+ pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-video-diffusion-img2vid-xt-1-1")
19
+ #pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
20
+ pipe.enable_xformers_memory_efficient_attention()
21
+ pipe = pipe.to(device)
22
+ max_64_bit_int = 2**63 - 1
23
 
24
+ def sample(
25
+ image: Image,
26
+ seed: Optional[int] = 42,
27
+ randomize_seed: bool = True,
28
+ motion_bucket_id: int = 127,
29
+ fps_id: int = 6,
30
+ version: str = "svd_xt",
31
+ cond_aug: float = 0.02,
32
+ decoding_t: int = 3, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
33
+ device: str = "cpu",
34
+ output_folder: str = "outputs",):
35
+
36
+ if image.mode == "RGBA":
37
+ image = image.convert("RGB")
38
+
39
+ if(randomize_seed):
40
+ seed = random.randint(0, max_64_bit_int)
41
+ generator = torch.manual_seed(seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ os.makedirs(output_folder, exist_ok=True)
44
+ base_count = len(glob(os.path.join(output_folder, "*.mp4")))
45
+ video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ frames = pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=0.1, num_frames=25).frames[0]
48
+ export_to_video(frames, video_path, fps=fps_id)
49
+ torch.manual_seed(seed)
50
+
51
+ return video_path, seed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
+ def resize_image(image, output_size=(1024, 578)):
54
+ # Calculate aspect ratios
55
+ target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
56
+ image_aspect = image.width / image.height # Aspect ratio of the original image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ # Resize then crop if the original image is larger
59
+ if image_aspect > target_aspect:
60
+ # Resize the image to match the target height, maintaining aspect ratio
61
+ new_height = output_size[1]
62
+ new_width = int(new_height * image_aspect)
63
+ resized_image = image.resize((new_width, new_height), Image.LANCZOS)
64
+ # Calculate coordinates for cropping
65
+ left = (new_width - output_size[0]) / 2
66
+ top = 0
67
+ right = (new_width + output_size[0]) / 2
68
+ bottom = output_size[1]
69
+ else:
70
+ # Resize the image to match the target width, maintaining aspect ratio
71
+ new_width = output_size[0]
72
+ new_height = int(new_width / image_aspect)
73
+ resized_image = image.resize((new_width, new_height), Image.LANCZOS)
74
+ # Calculate coordinates for cropping
75
+ left = 0
76
+ top = (new_height - output_size[1]) / 2
77
+ right = output_size[0]
78
+ bottom = (new_height + output_size[1]) / 2
79
+
80
+ # Crop the image
81
+ cropped_image = resized_image.crop((left, top, right, bottom))
82
+ return cropped_image
83
+
84
+ with gr.Blocks() as demo:
85
+ #gr.Markdown('''# Community demo for Stable Video Diffusion - Img2Vid - XT ([model](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt), [paper](https://stability.ai/research/stable-video-diffusion-scaling-latent-video-diffusion-models-to-large-datasets), [stability's ui waitlist](https://stability.ai/contact))
86
+ #### Research release ([_non-commercial_](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/blob/main/LICENSE)): generate `4s` vid from a single image at (`25 frames` at `6 fps`). this demo uses [🧨 diffusers for low VRAM and fast generation](https://huggingface.co/docs/diffusers/main/en/using-diffusers/svd).
87
+ #''')
88
+ with gr.Row():
89
+ with gr.Column():
90
+ image = gr.Image(label="Upload your image", type="pil")
91
+ generate_btn = gr.Button("Generate")
92
+ video = gr.Video()
93
+ with gr.Accordion("Advanced options", open=False):
94
+ seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
95
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
96
+ motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
97
+ fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)
98
+
99
+ image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
100
+ generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id], outputs=[video, seed], api_name="video",)# inputs=image, outputs=[video, seed], fn=sample, cache_examples=True,)
101
+
102
+ if __name__ == "__main__":
103
+ demo.queue(max_size=20, api_open=False)
104
+ demo.launch(share=True, show_api=False)