Spaces:

JingyeChen22
/

TextDiffuser

Running on T4

App Files Files Community

JingyeChen22 commited on Jun 15, 2023

Commit

cc859d1

•

1 Parent(s): 9de996f

Update util.py

Browse files

Files changed (1) hide show

util.py +54 -16

util.py CHANGED Viewed

@@ -26,7 +26,7 @@ for index, c in enumerate(alphabet):
-def transform_mask_pil(mask_root):
     """
     This function extracts the mask area and text area from the images.
@@ -37,13 +37,13 @@ def transform_mask_pil(mask_root):
             * The white area is the text area
     """
     img = np.array(mask_root)
-    img = cv2.resize(img, (512, 512), interpolation=cv2.INTER_NEAREST)
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     ret, binary = cv2.threshold(gray, 250, 255, cv2.THRESH_BINARY) # pixel value is set to 0 or 255 according to the threshold
     return 1 - (binary.astype(np.float32) / 255)
-def transform_mask(mask_root: str):
     """
     This function extracts the mask area and text area from the images.
@@ -54,7 +54,7 @@ def transform_mask(mask_root: str):
             * The white area is the text area
     """
     img = cv2.imread(mask_root)
-    img = cv2.resize(img, (512, 512), interpolation=cv2.INTER_NEAREST)
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     ret, binary = cv2.threshold(gray, 250, 255, cv2.THRESH_BINARY) # pixel value is set to 0 or 255 according to the threshold
     return 1 - (binary.astype(np.float32) / 255)
@@ -125,7 +125,45 @@ def filter_segmentation_mask(segmentation_mask: np.array):
-def combine_image(args, sub_output_dir: str, pred_image_list: List, image_pil: Image, character_mask_pil: Image, character_mask_highlight_pil: Image, caption_pil_list: List):
     """
     This function combines all the outputs and useful inputs together.
@@ -143,20 +181,20 @@ def combine_image(args, sub_output_dir: str, pred_image_list: List, image_pil: I
     if size == 1:
         return pred_image_list[0]
     elif size == 2:
-        blank = Image.new('RGB', (512*2, 512), (0,0,0))
         blank.paste(pred_image_list[0],(0,0))
-        blank.paste(pred_image_list[1],(512,0))
     elif size == 3:
-        blank = Image.new('RGB', (512*3, 512), (0,0,0))
         blank.paste(pred_image_list[0],(0,0))
-        blank.paste(pred_image_list[1],(512,0))
-        blank.paste(pred_image_list[2],(1024,0))
     elif size == 4:
-        blank = Image.new('RGB', (512*2, 512*2), (0,0,0))
         blank.paste(pred_image_list[0],(0,0))
-        blank.paste(pred_image_list[1],(512,0))
-        blank.paste(pred_image_list[2],(0,512))
-        blank.paste(pred_image_list[3],(512,512))
     return blank
@@ -303,4 +341,4 @@ def inpainting_merge_image(original_image, mask_image, inpainting_image):
             table.append(0)
     mask_image = mask_image.point(table, "1")
     merged_image = Image.composite(inpainting_image, original_image, mask_image)
-    return merged_image

+def transform_mask_pil(mask_root, size):
     """
     This function extracts the mask area and text area from the images.
             * The white area is the text area
     """
     img = np.array(mask_root)
+    img = cv2.resize(img, (size, size), interpolation=cv2.INTER_NEAREST)
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     ret, binary = cv2.threshold(gray, 250, 255, cv2.THRESH_BINARY) # pixel value is set to 0 or 255 according to the threshold
     return 1 - (binary.astype(np.float32) / 255)
+def transform_mask(mask_root, size):
     """
     This function extracts the mask area and text area from the images.
             * The white area is the text area
     """
     img = cv2.imread(mask_root)
+    img = cv2.resize(img, (size, size), interpolation=cv2.INTER_NEAREST)
     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
     ret, binary = cv2.threshold(gray, 250, 255, cv2.THRESH_BINARY) # pixel value is set to 0 or 255 according to the threshold
     return 1 - (binary.astype(np.float32) / 255)
+def combine_image(args, resolution, sub_output_dir: str, pred_image_list: List, image_pil: Image, character_mask_pil: Image, character_mask_highlight_pil: Image, caption_pil_list: List):
+    """
+    This function combines all the outputs and useful inputs together.
+    Args:
+        args (argparse.ArgumentParser): The arguments.
+        pred_image_list (List): List of predicted images.
+        image_pil (Image): The original image.
+        character_mask_pil (Image): The character-level segmentation mask.
+        character_mask_highlight_pil (Image): The character-level segmentation mask highlighting character regions with green color.
+        caption_pil_list (List): List of captions.
+    """
+    size = len(pred_image_list)
+    if size == 1:
+        return pred_image_list[0]
+    elif size == 2:
+        blank = Image.new('RGB', (resolution*2, resolution), (0,0,0))
+        blank.paste(pred_image_list[0],(0,0))
+        blank.paste(pred_image_list[1],(resolution,0))
+    elif size == 3:
+        blank = Image.new('RGB', (resolution*3, resolution), (0,0,0))
+        blank.paste(pred_image_list[0],(0,0))
+        blank.paste(pred_image_list[1],(resolution,0))
+        blank.paste(pred_image_list[2],(resolution*2,0))
+    elif size == 4:
+        blank = Image.new('RGB', (resolution*2, resolution*2), (0,0,0))
+        blank.paste(pred_image_list[0],(0,0))
+        blank.paste(pred_image_list[1],(resolution,0))
+        blank.paste(pred_image_list[2],(0,resolution))
+        blank.paste(pred_image_list[3],(resolution,resolution))
+    return blank
+def combine_image_gradio(args, size, sub_output_dir: str, pred_image_list: List, image_pil: Image, character_mask_pil: Image, character_mask_highlight_pil: Image, caption_pil_list: List):
     """
     This function combines all the outputs and useful inputs together.
     if size == 1:
         return pred_image_list[0]
     elif size == 2:
+        blank = Image.new('RGB', (size*2, size), (0,0,0))
         blank.paste(pred_image_list[0],(0,0))
+        blank.paste(pred_image_list[1],(size,0))
     elif size == 3:
+        blank = Image.new('RGB', (size*3, size), (0,0,0))
         blank.paste(pred_image_list[0],(0,0))
+        blank.paste(pred_image_list[1],(size,0))
+        blank.paste(pred_image_list[2],(size*2,0))
     elif size == 4:
+        blank = Image.new('RGB', (size*2, size*2), (0,0,0))
         blank.paste(pred_image_list[0],(0,0))
+        blank.paste(pred_image_list[1],(size,0))
+        blank.paste(pred_image_list[2],(0,size))
+        blank.paste(pred_image_list[3],(size,size))
     return blank
             table.append(0)
     mask_image = mask_image.point(table, "1")
     merged_image = Image.composite(inpainting_image, original_image, mask_image)
+    return merged_image