import gradio as gr import numpy as np from PIL import Image import random from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler import cv2 import torch import spaces def nms(x, t, s): x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s) f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8) f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8) f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8) f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8) y = np.zeros_like(x) for f in [f1, f2, f3, f4]: np.putmask(y, cv2.dilate(x, kernel=f) == x, x) z = np.zeros_like(y, dtype=np.uint8) z[y > t] = 255 return z device = torch.device("cuda" if torch.cuda.is_available() else "cpu") controlnet = ControlNetModel.from_pretrained( "xinsir/controlnet-scribble-sdxl-1.0", torch_dtype=torch.float16 ) vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16) pipe = StableDiffusionXLControlNetPipeline.from_pretrained( "yodayo-ai/holodayo-xl-2.1", controlnet=controlnet, vae=vae, torch_dtype=torch.float16, ) pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) pipe.to(device) MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 1216 @spaces.GPU def infer(image: Image, prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps) -> Image: width, height = image.size ratio = np.sqrt(1024. * 1024. / (width * height)) new_width, new_height = int(width * ratio), int(height * ratio) image = image.resize((new_width, new_height)) if randomize_seed: seed = random.randint(0, MAX_SEED) controlnet_img = np.array(image) controlnet_img = nms(controlnet_img, 127, 3) controlnet_img = cv2.GaussianBlur(controlnet_img, (0, 0), 3) random_val = int(round(random.uniform(0.01, 0.10), 2) * 255) controlnet_img[controlnet_img > random_val] = 255 controlnet_img[controlnet_img < 255] = 0 image = Image.fromarray(controlnet_img) generator = torch.Generator().manual_seed(seed) output_image = pipe( prompt=prompt + ", masterpiece, best quality, very aesthetic, absurdres", negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, width=width, height=height, generator=generator ).images[0] return output_image css = """ #col-container { margin: 0 auto; max-width: 520px; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown(""" # Text-to-Image Demo using [Holodayo XL 2.1](https://huggingface.co/yodayo-ai/holodayo-xl-2.1) """) with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=1, placeholder="Enter your prompt", container=False, ) run_button = gr.Button("Run", scale=0) image = gr.ImageEditor(type="pil", image_mode="L", crop_size=(512, 512)) result = gr.Image(label="Result", show_label=False) with gr.Accordion("Advanced Settings", open=False): negative_prompt = gr.Text( label="Negative prompt", max_lines=1, placeholder="Enter a negative prompt", value="nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn" ) seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) with gr.Row(): width = gr.Slider( label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=832, ) height = gr.Slider( label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1216, ) with gr.Row(): guidance_scale = gr.Slider( label="Guidance scale", minimum=0.0, maximum=20.0, step=0.1, value=7, ) num_inference_steps = gr.Slider( label="Number of inference steps", minimum=1, maximum=28, step=1, value=28, ) run_button.click( fn=infer, inputs=[image, prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps], outputs=[result] ) demo.queue().launch()