zhiweili commited on
Commit
7fe0dcd
1 Parent(s): 48c5d15

fix pipeline

Browse files
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import gradio as gr
2
 
3
- from app_base import create_demo as create_demo_face
4
  from app_haircolor import create_demo as create_demo_haircolor
5
 
6
  with gr.Blocks(css="style.css") as demo:
7
  with gr.Tabs():
8
- with gr.Tab(label="Face"):
9
- create_demo_face()
10
  with gr.Tab(label="Hair Color"):
11
  create_demo_haircolor()
12
 
 
1
  import gradio as gr
2
 
3
+ # from app_base import create_demo as create_demo_face
4
  from app_haircolor import create_demo as create_demo_haircolor
5
 
6
  with gr.Blocks(css="style.css") as demo:
7
  with gr.Tabs():
8
+ # with gr.Tab(label="Face"):
9
+ # create_demo_face()
10
  with gr.Tab(label="Hair Color"):
11
  create_demo_haircolor()
12
 
app_haircolor.py CHANGED
@@ -13,6 +13,7 @@ from segment_utils import(
13
  from gfpgan.utils import GFPGANer
14
  from basicsr.archs.srvgg_arch import SRVGGNetCompact
15
  from realesrgan.utils import RealESRGANer
 
16
 
17
 
18
  DEFAULT_SRC_PROMPT = "a woman"
@@ -23,7 +24,6 @@ DEFAULT_CATEGORY = "hair"
23
  device = "cuda" if torch.cuda.is_available() else "cpu"
24
 
25
  def create_demo() -> gr.Blocks:
26
- from inversion_run_realvxl_adapter import run as realvxl_run
27
  model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
28
  model_path = 'realesr-general-x4v3.pth'
29
  half = True if torch.cuda.is_available() else False
 
13
  from gfpgan.utils import GFPGANer
14
  from basicsr.archs.srvgg_arch import SRVGGNetCompact
15
  from realesrgan.utils import RealESRGANer
16
+ from inversion_run_realvxl_adapter import run as realvxl_run
17
 
18
 
19
  DEFAULT_SRC_PROMPT = "a woman"
 
24
  device = "cuda" if torch.cuda.is_available() else "cpu"
25
 
26
  def create_demo() -> gr.Blocks:
 
27
  model = SRVGGNetCompact(num_in_ch=3, num_out_ch=3, num_feat=64, num_conv=32, upscale=4, act_type='prelu')
28
  model_path = 'realesr-general-x4v3.pth'
29
  half = True if torch.cuda.is_available() else False
pipelines/pipeline_sdxl_adapter_img2img.py CHANGED
@@ -89,6 +89,7 @@ from diffusers.pipelines.stable_diffusion_xl.pipeline_output import (
89
  StableDiffusionXLPipelineOutput,
90
  )
91
 
 
92
  if is_invisible_watermark_available():
93
  from diffusers.pipelines.stable_diffusion_xl.watermark import (
94
  StableDiffusionXLWatermarker,
@@ -123,6 +124,7 @@ EXAMPLE_DOC_STRING = """
123
  ```
124
  """
125
 
 
126
  def _preprocess_adapter_image(image, height, width):
127
  if isinstance(image, torch.Tensor):
128
  return image
@@ -591,6 +593,52 @@ class StableDiffusionXLImg2ImgPipeline(
591
 
592
  return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
594
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
595
  def prepare_extra_step_kwargs(self, generator, eta):
596
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
@@ -704,16 +752,14 @@ class StableDiffusionXLImg2ImgPipeline(
704
  if denoising_start is None:
705
  init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
706
  t_start = max(num_inference_steps - init_timestep, 0)
 
 
707
 
708
- timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
709
- if hasattr(self.scheduler, "set_begin_index"):
710
- self.scheduler.set_begin_index(t_start * self.scheduler.order)
711
-
712
- return timesteps, num_inference_steps - t_start
713
 
714
- else:
715
- # Strength is irrelevant if we directly request a timestep to start at;
716
- # that is, strength is determined by the denoising_start instead.
717
  discrete_timestep_cutoff = int(
718
  round(
719
  self.scheduler.config.num_train_timesteps
@@ -721,7 +767,7 @@ class StableDiffusionXLImg2ImgPipeline(
721
  )
722
  )
723
 
724
- num_inference_steps = (self.scheduler.timesteps < discrete_timestep_cutoff).sum().item()
725
  if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
726
  # if the scheduler is a 2nd order scheduler we might have to do +1
727
  # because `num_inference_steps` might be even given that every timestep
@@ -732,12 +778,11 @@ class StableDiffusionXLImg2ImgPipeline(
732
  num_inference_steps = num_inference_steps + 1
733
 
734
  # because t_n+1 >= t_n, we slice the timesteps starting from the end
735
- t_start = len(self.scheduler.timesteps) - num_inference_steps
736
- timesteps = self.scheduler.timesteps[t_start:]
737
- if hasattr(self.scheduler, "set_begin_index"):
738
- self.scheduler.set_begin_index(t_start)
739
  return timesteps, num_inference_steps
740
 
 
 
741
  def prepare_latents(
742
  self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
743
  ):
@@ -1409,6 +1454,7 @@ class StableDiffusionXLImg2ImgPipeline(
1409
  target_size = target_size or (height, width)
1410
 
1411
  # 8. Prepare added time ids & embeddings
 
1412
  if isinstance(self.adapter, MultiAdapter):
1413
  adapter_state = self.adapter(adapter_input, adapter_conditioning_scale)
1414
  for k, v in enumerate(adapter_state):
@@ -1521,7 +1567,7 @@ class StableDiffusionXLImg2ImgPipeline(
1521
  down_intrablock_additional_residuals = [state.clone() for state in adapter_state]
1522
  else:
1523
  down_intrablock_additional_residuals = None
1524
-
1525
  noise_pred = self.unet(
1526
  latent_model_input,
1527
  t,
 
89
  StableDiffusionXLPipelineOutput,
90
  )
91
 
92
+
93
  if is_invisible_watermark_available():
94
  from diffusers.pipelines.stable_diffusion_xl.watermark import (
95
  StableDiffusionXLWatermarker,
 
124
  ```
125
  """
126
 
127
+
128
  def _preprocess_adapter_image(image, height, width):
129
  if isinstance(image, torch.Tensor):
130
  return image
 
593
 
594
  return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds
595
 
596
+ # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_ip_adapter_image_embeds
597
+ def prepare_ip_adapter_image_embeds(
598
+ self, ip_adapter_image, ip_adapter_image_embeds, device, num_images_per_prompt, do_classifier_free_guidance
599
+ ):
600
+ image_embeds = []
601
+ if do_classifier_free_guidance:
602
+ negative_image_embeds = []
603
+ if ip_adapter_image_embeds is None:
604
+ if not isinstance(ip_adapter_image, list):
605
+ ip_adapter_image = [ip_adapter_image]
606
+
607
+ if len(ip_adapter_image) != len(self.unet.encoder_hid_proj.image_projection_layers):
608
+ raise ValueError(
609
+ f"`ip_adapter_image` must have same length as the number of IP Adapters. Got {len(ip_adapter_image)} images and {len(self.unet.encoder_hid_proj.image_projection_layers)} IP Adapters."
610
+ )
611
+
612
+ for single_ip_adapter_image, image_proj_layer in zip(
613
+ ip_adapter_image, self.unet.encoder_hid_proj.image_projection_layers
614
+ ):
615
+ output_hidden_state = not isinstance(image_proj_layer, ImageProjection)
616
+ single_image_embeds, single_negative_image_embeds = self.encode_image(
617
+ single_ip_adapter_image, device, 1, output_hidden_state
618
+ )
619
+
620
+ image_embeds.append(single_image_embeds[None, :])
621
+ if do_classifier_free_guidance:
622
+ negative_image_embeds.append(single_negative_image_embeds[None, :])
623
+ else:
624
+ for single_image_embeds in ip_adapter_image_embeds:
625
+ if do_classifier_free_guidance:
626
+ single_negative_image_embeds, single_image_embeds = single_image_embeds.chunk(2)
627
+ negative_image_embeds.append(single_negative_image_embeds)
628
+ image_embeds.append(single_image_embeds)
629
+
630
+ ip_adapter_image_embeds = []
631
+ for i, single_image_embeds in enumerate(image_embeds):
632
+ single_image_embeds = torch.cat([single_image_embeds] * num_images_per_prompt, dim=0)
633
+ if do_classifier_free_guidance:
634
+ single_negative_image_embeds = torch.cat([negative_image_embeds[i]] * num_images_per_prompt, dim=0)
635
+ single_image_embeds = torch.cat([single_negative_image_embeds, single_image_embeds], dim=0)
636
+
637
+ single_image_embeds = single_image_embeds.to(device=device)
638
+ ip_adapter_image_embeds.append(single_image_embeds)
639
+
640
+ return ip_adapter_image_embeds
641
+
642
  # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_extra_step_kwargs
643
  def prepare_extra_step_kwargs(self, generator, eta):
644
  # prepare extra kwargs for the scheduler step, since not all schedulers have the same signature
 
752
  if denoising_start is None:
753
  init_timestep = min(int(num_inference_steps * strength), num_inference_steps)
754
  t_start = max(num_inference_steps - init_timestep, 0)
755
+ else:
756
+ t_start = 0
757
 
758
+ timesteps = self.scheduler.timesteps[t_start * self.scheduler.order :]
 
 
 
 
759
 
760
+ # Strength is irrelevant if we directly request a timestep to start at;
761
+ # that is, strength is determined by the denoising_start instead.
762
+ if denoising_start is not None:
763
  discrete_timestep_cutoff = int(
764
  round(
765
  self.scheduler.config.num_train_timesteps
 
767
  )
768
  )
769
 
770
+ num_inference_steps = (timesteps < discrete_timestep_cutoff).sum().item()
771
  if self.scheduler.order == 2 and num_inference_steps % 2 == 0:
772
  # if the scheduler is a 2nd order scheduler we might have to do +1
773
  # because `num_inference_steps` might be even given that every timestep
 
778
  num_inference_steps = num_inference_steps + 1
779
 
780
  # because t_n+1 >= t_n, we slice the timesteps starting from the end
781
+ timesteps = timesteps[-num_inference_steps:]
 
 
 
782
  return timesteps, num_inference_steps
783
 
784
+ return timesteps, num_inference_steps - t_start
785
+
786
  def prepare_latents(
787
  self, image, timestep, batch_size, num_images_per_prompt, dtype, device, generator=None, add_noise=True
788
  ):
 
1454
  target_size = target_size or (height, width)
1455
 
1456
  # 8. Prepare added time ids & embeddings
1457
+ # adapter_input = adapter_input.type(latents.dtype)
1458
  if isinstance(self.adapter, MultiAdapter):
1459
  adapter_state = self.adapter(adapter_input, adapter_conditioning_scale)
1460
  for k, v in enumerate(adapter_state):
 
1567
  down_intrablock_additional_residuals = [state.clone() for state in adapter_state]
1568
  else:
1569
  down_intrablock_additional_residuals = None
1570
+
1571
  noise_pred = self.unet(
1572
  latent_model_input,
1573
  t,