Spaces:

AskUI
/

pta-text-v0.1

Sleeping

File size: 5,585 Bytes

da59cbe

import io
import os
import textwrap
from typing import Dict, Optional, Tuple

from huggingface_hub import hf_hub_download
from PIL import Image, ImageDraw, ImageFont

DEFAULT_FONT_PATH = "ybelkada/fonts"


def download_default_font():
    font_path = hf_hub_download(DEFAULT_FONT_PATH, "Arial.TTF")
    return font_path


def render_text(
    text: str,
    text_size: int = 36,
    text_color: str = "black",
    background_color: str = "white",
    left_padding: int = 5,
    right_padding: int = 5,
    top_padding: int = 5,
    bottom_padding: int = 5,
    font_bytes: Optional[bytes] = None,
    font_path: Optional[str] = None,
) -> Image.Image:
    """
    Render text. This script is entirely adapted from the original script that can be found here:
    https://github.com/google-research/pix2struct/blob/main/pix2struct/preprocessing/preprocessing_utils.py

    Args:
        text (`str`, *optional*, defaults to ):
            Text to render.
        text_size (`int`, *optional*, defaults to 36):
            Size of the text.
        text_color (`str`, *optional*, defaults to `"black"`):
            Color of the text.
        background_color (`str`, *optional*, defaults to `"white"`):
            Color of the background.
        left_padding (`int`, *optional*, defaults to 5):
            Padding on the left.
        right_padding (`int`, *optional*, defaults to 5):
            Padding on the right.
        top_padding (`int`, *optional*, defaults to 5):
            Padding on the top.
        bottom_padding (`int`, *optional*, defaults to 5):
            Padding on the bottom.
        font_bytes (`bytes`, *optional*):
            Bytes of the font to use. If `None`, the default font will be used.
        font_path (`str`, *optional*):
            Path to the font to use. If `None`, the default font will be used.
    """
    wrapper = textwrap.TextWrapper(
        width=80
    )  # Add new lines so that each line is no more than 80 characters.
    lines = wrapper.wrap(text=text)
    wrapped_text = "\n".join(lines)

    if font_bytes is not None and font_path is None:
        font = io.BytesIO(font_bytes)
    elif font_path is not None:
        font = font_path
    else:
        font = hf_hub_download(DEFAULT_FONT_PATH, "Arial.TTF")
        raise ValueError(
            "Either font_bytes or font_path must be provided. "
            f"Using default font {font}."
        )
    font = ImageFont.truetype(font, encoding="UTF-8", size=text_size)

    # Use a temporary canvas to determine the width and height in pixels when
    # rendering the text.
    temp_draw = ImageDraw.Draw(Image.new("RGB", (1, 1), background_color))
    _, _, text_width, text_height = temp_draw.textbbox((0, 0), wrapped_text, font)

    # Create the actual image with a bit of padding around the text.
    image_width = text_width + left_padding + right_padding
    image_height = text_height + top_padding + bottom_padding
    image = Image.new("RGB", (image_width, image_height), background_color)
    draw = ImageDraw.Draw(image)
    draw.text(
        xy=(left_padding, top_padding), text=wrapped_text, fill=text_color, font=font
    )
    return image


# Adapted from https://github.com/google-research/pix2struct/blob/0e1779af0f4db4b652c1d92b3bbd2550a7399123/pix2struct/preprocessing/preprocessing_utils.py#L87
def render_header(
    image: Image.Image, header: str, bbox: Dict[str, float], font_path: str, **kwargs
) -> Tuple[Image.Image, Tuple[float, float, float, float]]:
    """
    Renders the input text as a header on the input image and updates the bounding box.

    Args:
        image (Image.Image):
            The image to render the header on.
        header (str):
            The header text.
        bbox (Dict[str,float]):
            The bounding box in relative position (0-1), format ("x_min": 0,
                                                                 "y_min": 0,
                                                                 "x_max": 0,
                                                                 "y_max": 0).
        input_data_format (Union[str, ChildProcessError], optional):
            The data format of the image.

    Returns:
        Tuple[Image.Image, Dict[str, float] ]:
        The image with the header rendered and the updated bounding box.
    """
    assert os.path.exists(font_path), f"Font path {font_path} does not exist."
    header_image = render_text(text=header, font_path=font_path, **kwargs)
    new_width = max(header_image.width, image.width)

    new_height = int(image.height * (new_width / image.width))
    new_header_height = int(header_image.height * (new_width / header_image.width))

    new_image = Image.new("RGB", (new_width, new_height + new_header_height), "white")
    new_image.paste(header_image.resize((new_width, new_header_height)), (0, 0))
    new_image.paste(image.resize((new_width, new_height)), (0, new_header_height))

    new_total_height = new_image.height

    new_bbox = {
        "xmin": bbox["xmin"],
        "ymin": ((bbox["ymin"] * new_height) + new_header_height)
        / new_total_height,  # shift y_min down by the header's relative height
        "xmax": bbox["xmax"],
        "ymax": ((bbox["ymax"] * new_height) + new_header_height)
        / new_total_height,  # shift y_min down by the header's relative height
    }

    return (
        new_image,
        new_bbox,
        {
            "width": new_width,
            "height": new_height,
            "header_height": new_header_height,
            "total_height": new_total_height,
        },
    )