Spaces:

smartfeed
/

turbo_inversion

Running on Zero

App Files Files Community

zhiweili commited on 13 days ago

Commit

7fe0dcd

•

1 Parent(s): 48c5d15

fix pipeline

Browse files

Files changed (3) hide show

app.py +3 -3
app_haircolor.py +1 -1
pipelines/pipeline_sdxl_adapter_img2img.py +60 -14

app.py CHANGED Viewed

@@ -1,12 +1,12 @@
 import gradio as gr
-from app_base import create_demo as create_demo_face
 from app_haircolor import create_demo as create_demo_haircolor
 with gr.Blocks(css="style.css") as demo:
     with gr.Tabs():
-        with gr.Tab(label="Face"):
-            create_demo_face()
         with gr.Tab(label="Hair Color"):
             create_demo_haircolor()

 import gradio as gr
+# from app_base import create_demo as create_demo_face
 from app_haircolor import create_demo as create_demo_haircolor
 with gr.Blocks(css="style.css") as demo:
     with gr.Tabs():
+        # with gr.Tab(label="Face"):
+        #     create_demo_face()
         with gr.Tab(label="Hair Color"):
             create_demo_haircolor()

app_haircolor.py CHANGED Viewed

@@ -13,6 +13,7 @@ from segment_utils import(
 from gfpgan.utils import GFPGANer
 from basicsr.archs.srvgg_arch import SRVGGNetCompact
 from realesrgan.utils import RealESRGANer
 DEFAULT_SRC_PROMPT = "a woman"
@@ -23,7 +24,6 @@ DEFAULT_CATEGORY = "hair"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 def create_demo() -> gr.Blocks:
-    from inversion_run_realvxl_adapter import run as realvxl_run
     model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
     model_path = 'realesr-general-x4v3.pth'
     half = True if torch.cuda.is_available() else False

 from gfpgan.utils import GFPGANer
 from basicsr.archs.srvgg_arch import SRVGGNetCompact
 from realesrgan.utils import RealESRGANer
+from inversion_run_realvxl_adapter import run as realvxl_run
 DEFAULT_SRC_PROMPT = "a woman"
 device = "cuda" if torch.cuda.is_available() else "cpu"
 def create_demo() -> gr.Blocks:
     model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
     model_path = 'realesr-general-x4v3.pth'
     half = True if torch.cuda.is_available() else False

pipelines/pipeline_sdxl_adapter_img2img.py CHANGED Viewed

@@ -89,6 +89,7 @@ from diffusers.pipelines.stable_diffusion_xl.pipeline_output import (
     StableDiffusionXLPipelineOutput,
 )
 if is_invisible_watermark_available():
     from diffusers.pipelines.stable_diffusion_xl.watermark import (
         StableDiffusionXLWatermarker,
@@ -123,6 +124,7 @@ EXAMPLE_DOC_STRING = """
         ```
 """
 def _preprocess_adapter_image(image, height, width):
     if isinstance(image, torch.Tensor):
         return image
@@ -591,6 +593,52 @@ class StableDiffusionXLImg2ImgPipeline(
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
     def prepare_extra_step_kwargs(self, generator, eta):
         # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
@@ -704,16 +752,14 @@ class StableDiffusionXLImg2ImgPipeline(
         if denoising_start is None:
             init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
             t_start = max(num_inference_steps - init_timestep, 0)
-            timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
-            if hasattr(self.scheduler, "set_begin_index"):
-                self.scheduler.set_begin_index(t_start * self.scheduler.order)
-            return timesteps, num_inference_steps - t_start
-        else:
-            # Strength is irrelevant if we directly request a timestep to start at;
-            # that is, strength is determined by the denoising_start instead.
             discrete_timestep_cutoff = int(
                 round(
                     self.scheduler.config.num_train_timesteps
@@ -721,7 +767,7 @@ class StableDiffusionXLImg2ImgPipeline(
                 )
             )
-            num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
             if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
                 # if the scheduler is a 2nd order scheduler we might have to do +1
                 # because `num_inference_steps` might be even given that every timestep
@@ -732,12 +778,11 @@ class StableDiffusionXLImg2ImgPipeline(
                 num_inference_steps = num_inference_steps + 1
             # because t_n+1 >= t_n, we slice the timesteps starting from the end
-            t_start = len(self.scheduler.timesteps) - num_inference_steps
-            timesteps = self.scheduler.timesteps[t_start:]
-            if hasattr(self.scheduler, "set_begin_index"):
-                self.scheduler.set_begin_index(t_start)
             return timesteps, num_inference_steps
     def prepare_latents(
         self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
     ):
@@ -1409,6 +1454,7 @@ class StableDiffusionXLImg2ImgPipeline(
         target_size = target_size or (height, width)
         # 8. Prepare added time ids & embeddings
         if isinstance(self.adapter, MultiAdapter):
             adapter_state = self.adapter(adapter_input, adapter_conditioning_scale)
             for k, v in enumerate(adapter_state):
@@ -1521,7 +1567,7 @@ class StableDiffusionXLImg2ImgPipeline(
                     down_intrablock_additional_residuals = [state.clone() for state in adapter_state]
                 else:
                     down_intrablock_additional_residuals = None
                 noise_pred = self.unet(
                     latent_model_input,
                     t,

     StableDiffusionXLPipelineOutput,
 )
 if is_invisible_watermark_available():
     from diffusers.pipelines.stable_diffusion_xl.watermark import (
         StableDiffusionXLWatermarker,
         ```
 """
 def _preprocess_adapter_image(image, height, width):
     if isinstance(image, torch.Tensor):
         return image
         return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
+    # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_ip_adapter_image_embeds
+    def prepare_ip_adapter_image_embeds(
+        self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
+    ):
+        image_embeds = []
+        if do_classifier_free_guidance:
+            negative_image_embeds = []
+        if ip_adapter_image_embeds is None:
+            if not isinstance(ip_adapter_image, list):
+                ip_adapter_image = [ip_adapter_image]
+            if len(ip_adapter_image) != len(self.unet.encoder_hid_proj.image_projection_layers):
+                raise ValueError(
+                    f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
+                )
+            for single_ip_adapter_image, image_proj_layer in zip(
+                ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
+            ):
+                output_hidden_state = not isinstance(image_proj_layer, ImageProjection)
+                single_image_embeds, single_negative_image_embeds = self.encode_image(
+                    single_ip_adapter_image, device, 1, output_hidden_state
+                )
+                image_embeds.append(single_image_embeds[None, :])
+                if do_classifier_free_guidance:
+                    negative_image_embeds.append(single_negative_image_embeds[None, :])
+        else:
+            for single_image_embeds in ip_adapter_image_embeds:
+                if do_classifier_free_guidance:
+                    single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
+                    negative_image_embeds.append(single_negative_image_embeds)
+                image_embeds.append(single_image_embeds)
+        ip_adapter_image_embeds = []
+        for i, single_image_embeds in enumerate(image_embeds):
+            single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
+            if do_classifier_free_guidance:
+                single_negative_image_embeds = torch.cat([negative_image_embeds[i]] * num_images_per_prompt, dim=0)
+                single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds], dim=0)
+            single_image_embeds = single_image_embeds.to(device=device)
+            ip_adapter_image_embeds.append(single_image_embeds)
+        return ip_adapter_image_embeds
     # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
     def prepare_extra_step_kwargs(self, generator, eta):
         # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
         if denoising_start is None:
             init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
             t_start = max(num_inference_steps - init_timestep, 0)
+        else:
+            t_start = 0
+        timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
+        # Strength is irrelevant if we directly request a timestep to start at;
+        # that is, strength is determined by the denoising_start instead.
+        if denoising_start is not None:
             discrete_timestep_cutoff = int(
                 round(
                     self.scheduler.config.num_train_timesteps
                 )
             )
+            num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
             if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
                 # if the scheduler is a 2nd order scheduler we might have to do +1
                 # because `num_inference_steps` might be even given that every timestep
                 num_inference_steps = num_inference_steps + 1
             # because t_n+1 >= t_n, we slice the timesteps starting from the end
+            timesteps = timesteps[-num_inference_steps:]
             return timesteps, num_inference_steps
+        return timesteps, num_inference_steps - t_start
     def prepare_latents(
         self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
     ):
         target_size = target_size or (height, width)
         # 8. Prepare added time ids & embeddings
+        # adapter_input = adapter_input.type(latents.dtype)
         if isinstance(self.adapter, MultiAdapter):
             adapter_state = self.adapter(adapter_input, adapter_conditioning_scale)
             for k, v in enumerate(adapter_state):
                     down_intrablock_additional_residuals = [state.clone() for state in adapter_state]
                 else:
                     down_intrablock_additional_residuals = None
                 noise_pred = self.unet(
                     latent_model_input,
                     t,