onscreen-translator/helpers/draw.py

from PIL import Image, ImageDraw, ImageFont
from dotenv import load_dotenv
import os
import io
import numpy as np
import ast
from helpers.utils import romanize, intercepts
load_dotenv()

MAX_TRANSLATE = int(os.getenv('MAX_TRANSLATION', 200))
FONT_FILE = os.getenv('FONT_FILE')
FONT_SIZE = int(os.getenv('FONT_SIZE', 16))
LINE_SPACING = int(os.getenv('LINE_SPACING', 3))
TEXT_COLOR = os.getenv('TEXT_COLOR', "#ff0000")
LINE_HEIGHT = FONT_SIZE
TO_ROMANIZE = ast.literal_eval(os.getenv('TO_ROMANIZE', 'True'))
font = ImageFont.truetype(FONT_FILE, FONT_SIZE)


def modify_image_bytes(image_bytes: io.BytesIO, ocr_output, translation: list) -> bytes:
    # Load the image from bytes
    with io.BytesIO(image_bytes) as byte_stream:
        image = Image.open(byte_stream)
        draw = ImageDraw.Draw(image)
        translate_image(draw, translation, ocr_output, MAX_TRANSLATE)

    # Save the modified image back to bytes without changing the format
    with io.BytesIO() as byte_stream:
        image.save(byte_stream, format=image.format)  # Save in original format
        modified_image_bytes = byte_stream.getvalue()

    return modified_image_bytes

def translate_image(draw: ImageDraw, translation: list, ocr_output: list, max_translate: int) -> ImageDraw:
    translation
    translated_number = 0
    bounding_boxes = []
    for i, (position, untranslated_phrase, confidence) in enumerate(ocr_output):
        if translated_number >= max_translate:
            break
        translate_one_phrase(draw, translation[i], position, bounding_boxes, untranslated_phrase)
        translated_number += 1
    return draw

def translate_one_phrase(draw: ImageDraw, translated_phrase: str, position: tuple, bounding_boxes: list, untranslated_phrase: str) -> ImageDraw:
    # Draw the bounding box
    top_left, _, _, _ = position
    position = (top_left[0], top_left[1] - 60)
    text_content = f"{translated_phrase}\n{romanize(untranslated_phrase, TO_ROMANIZE)}\n{untranslated_phrase}"
    lines = text_content.split('\n')
    x,y = position
    max_width = 0
    total_height = 0
    total_height = len(lines) * (LINE_HEIGHT + LINE_SPACING)
    for line in lines:
        bbox = draw.textbbox(position, line, font=font)
        line_width = bbox[2] - bbox[0]
        max_width = max(max_width, line_width)
    bounding_box = (x, y, x + max_width, y + total_height, untranslated_phrase)
    print(f"Bounding Box of Interest: {bounding_box}")

    adjust_if_intersects(x, y, bounding_box, bounding_boxes, untranslated_phrase, max_width, total_height)
    adjusted_x, adjusted_y, adjusted_max_x, adjusted_max_y, _ = bounding_boxes[-1]
    draw.rectangle([(adjusted_x,adjusted_y), (adjusted_max_x, adjusted_max_y)], outline="black", width=1)
    position = (adjusted_x,adjusted_y)
    for line in lines:
        draw.text(position, line, fill= TEXT_COLOR, font=font)
        adjusted_y += FONT_SIZE + LINE_SPACING
        position = (adjusted_x,adjusted_y)
    print(f"Adjusted_bounding_box: {bounding_box[-1]}.\n")

def adjust_if_intersects(x: int, y: int, bounding_box: tuple, bounding_boxes: list, untranslated_phrase: str, max_width: int, total_height: int) -> tuple:
    y = np.max([y,0])
    if len(bounding_boxes) > 0:
        for box in bounding_boxes:
            print(f'Investigating box: {box}')
            if intercepts((box[0],box[2]),(bounding_box[0],bounding_box[2])) and intercepts((box[1],box[3]),(y, y+total_height)):
                print(f'Overlapping change adjustment to {untranslated_phrase}')
                y = np.max([y,box[3]]) + LINE_SPACING
                print(y, box[3])
                print(f'Changed to {(x,y, x+max_width, y+total_height, untranslated_phrase)}')
    adjusted_bounding_box = (x, y, x + max_width, y + total_height, untranslated_phrase)
    bounding_boxes.append(adjusted_bounding_box)
    return adjusted_bounding_box