File size: 5,585 Bytes
da59cbe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import io
import os
import textwrap
from typing import Dict, Optional, Tuple

from huggingface_hub import hf_hub_download
from PIL import Image, ImageDraw, ImageFont

DEFAULT_FONT_PATH = "ybelkada/fonts"


def download_default_font():
    font_path = hf_hub_download(DEFAULT_FONT_PATH, "Arial.TTF")
    return font_path


def render_text(
    text: str,
    text_size: int = 36,
    text_color: str = "black",
    background_color: str = "white",
    left_padding: int = 5,
    right_padding: int = 5,
    top_padding: int = 5,
    bottom_padding: int = 5,
    font_bytes: Optional[bytes] = None,
    font_path: Optional[str] = None,
) -> Image.Image:
    """
    Render text. This script is entirely adapted from the original script that can be found here:
    https://github.com/google-research/pix2struct/blob/main/pix2struct/preprocessing/preprocessing_utils.py

    Args:
        text (`str`, *optional*, defaults to ):
            Text to render.
        text_size (`int`, *optional*, defaults to 36):
            Size of the text.
        text_color (`str`, *optional*, defaults to `"black"`):
            Color of the text.
        background_color (`str`, *optional*, defaults to `"white"`):
            Color of the background.
        left_padding (`int`, *optional*, defaults to 5):
            Padding on the left.
        right_padding (`int`, *optional*, defaults to 5):
            Padding on the right.
        top_padding (`int`, *optional*, defaults to 5):
            Padding on the top.
        bottom_padding (`int`, *optional*, defaults to 5):
            Padding on the bottom.
        font_bytes (`bytes`, *optional*):
            Bytes of the font to use. If `None`, the default font will be used.
        font_path (`str`, *optional*):
            Path to the font to use. If `None`, the default font will be used.
    """
    wrapper = textwrap.TextWrapper(
        width=80
    )  # Add new lines so that each line is no more than 80 characters.
    lines = wrapper.wrap(text=text)
    wrapped_text = "\n".join(lines)

    if font_bytes is not None and font_path is None:
        font = io.BytesIO(font_bytes)
    elif font_path is not None:
        font = font_path
    else:
        font = hf_hub_download(DEFAULT_FONT_PATH, "Arial.TTF")
        raise ValueError(
            "Either font_bytes or font_path must be provided. "
            f"Using default font {font}."
        )
    font = ImageFont.truetype(font, encoding="UTF-8", size=text_size)

    # Use a temporary canvas to determine the width and height in pixels when
    # rendering the text.
    temp_draw = ImageDraw.Draw(Image.new("RGB", (1, 1), background_color))
    _, _, text_width, text_height = temp_draw.textbbox((0, 0), wrapped_text, font)

    # Create the actual image with a bit of padding around the text.
    image_width = text_width + left_padding + right_padding
    image_height = text_height + top_padding + bottom_padding
    image = Image.new("RGB", (image_width, image_height), background_color)
    draw = ImageDraw.Draw(image)
    draw.text(
        xy=(left_padding, top_padding), text=wrapped_text, fill=text_color, font=font
    )
    return image


# Adapted from https://github.com/google-research/pix2struct/blob/0e1779af0f4db4b652c1d92b3bbd2550a7399123/pix2struct/preprocessing/preprocessing_utils.py#L87
def render_header(
    image: Image.Image, header: str, bbox: Dict[str, float], font_path: str, **kwargs
) -> Tuple[Image.Image, Tuple[float, float, float, float]]:
    """
    Renders the input text as a header on the input image and updates the bounding box.

    Args:
        image (Image.Image):
            The image to render the header on.
        header (str):
            The header text.
        bbox (Dict[str,float]):
            The bounding box in relative position (0-1), format ("x_min": 0,
                                                                 "y_min": 0,
                                                                 "x_max": 0,
                                                                 "y_max": 0).
        input_data_format (Union[str, ChildProcessError], optional):
            The data format of the image.

    Returns:
        Tuple[Image.Image, Dict[str, float] ]:
        The image with the header rendered and the updated bounding box.
    """
    assert os.path.exists(font_path), f"Font path {font_path} does not exist."
    header_image = render_text(text=header, font_path=font_path, **kwargs)
    new_width = max(header_image.width, image.width)

    new_height = int(image.height * (new_width / image.width))
    new_header_height = int(header_image.height * (new_width / header_image.width))

    new_image = Image.new("RGB", (new_width, new_height + new_header_height), "white")
    new_image.paste(header_image.resize((new_width, new_header_height)), (0, 0))
    new_image.paste(image.resize((new_width, new_height)), (0, new_header_height))

    new_total_height = new_image.height

    new_bbox = {
        "xmin": bbox["xmin"],
        "ymin": ((bbox["ymin"] * new_height) + new_header_height)
        / new_total_height,  # shift y_min down by the header's relative height
        "xmax": bbox["xmax"],
        "ymax": ((bbox["ymax"] * new_height) + new_header_height)
        / new_total_height,  # shift y_min down by the header's relative height
    }

    return (
        new_image,
        new_bbox,
        {
            "width": new_width,
            "height": new_height,
            "header_height": new_header_height,
            "total_height": new_total_height,
        },
    )