from PIL import Image, ImageDraw, ImageFont import os, io, sys, numpy as np sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'helpers')) from utils import romanize, intercepts, add_furigana from logging_config import logger from config import ADD_OVERLAY, SOURCE_LANG, MAX_TRANSLATE, FONT_FILE, FONT_SIZE, LINE_SPACING, FONT_COLOUR, LINE_HEIGHT, TO_ROMANIZE, FILL_COLOUR, REGION from PySide6.QtGui import QFont font = ImageFont.truetype(FONT_FILE, FONT_SIZE) def modify_image_bytes(image_bytes: io.BytesIO, ocr_output, translation: list) -> bytes: """Modify the image bytes with the translated text and return the modified image bytes""" with io.BytesIO(image_bytes) as byte_stream: image = Image.open(byte_stream) draw = ImageDraw.Draw(image) draw_on_image(draw, translation, ocr_output, MAX_TRANSLATE) # Save the modified image back to bytes without changing the format with io.BytesIO() as byte_stream: image.save(byte_stream, format=image.format) # Save in original format modified_image_bytes = byte_stream.getvalue() return modified_image_bytes def draw_on_image(draw: ImageDraw, translation: list, ocr_output: list, max_translate: int, replace = False) -> ImageDraw: """Draw the original, translated and optionally the romanisation of the texts on the image""" translated_number = 0 bounding_boxes = [] logger.debug(f"Translations: {len(translation)} {translation}") logger.debug(f"OCR output: {len(ocr_output)} {ocr_output}") for i, (position, untranslated_phrase, confidence) in enumerate(ocr_output): logger.debug(f"Untranslated phrase: {untranslated_phrase}") if translated_number >= max_translate - 1: break if replace: draw = draw_one_phrase_replace(draw, translation[i], position, bounding_boxes, untranslated_phrase) else: draw_one_phrase_add(draw, translation[i], position, bounding_boxes, untranslated_phrase) translated_number += 1 return draw def draw_one_phrase_add(draw: ImageDraw, translated_phrase: str, position: tuple, bounding_boxes: list, untranslated_phrase: str) -> ImageDraw: """Draw the bounding box rectangle and text on the image above the original text""" if SOURCE_LANG == 'ja': untranslated_phrase = add_furigana(untranslated_phrase) romanized_phrase = romanize(untranslated_phrase, 'ja') else: romanized_phrase = romanize(untranslated_phrase, SOURCE_LANG) if TO_ROMANIZE: text_content = f"{translated_phrase}\n{romanized_phrase}\n{untranslated_phrase}" else: text_content = f"{translated_phrase}\n{untranslated_phrase}" lines = text_content.split('\n') # Draw the bounding box top_left, _, _, _ = position max_width = get_max_width(lines, FONT_FILE, FONT_SIZE) total_height = get_max_height(lines, FONT_SIZE, LINE_SPACING) right_edge = REGION[2] # Ensure the text is within the screen. P.S. Text on the edge may still be squished together if there are too many to translate x_onscreen = top_left[0] if top_left[0] + max_width <= right_edge else right_edge - max_width y_onscreen = max(top_left[1] - total_height, 0) bounding_box = (x_onscreen, y_onscreen, x_onscreen + max_width, y_onscreen + total_height, untranslated_phrase) adjust_if_intersects(x_onscreen, y_onscreen, bounding_box, bounding_boxes, untranslated_phrase, max_width, total_height) adjusted_x, adjusted_y, adjusted_max_x, adjusted_max_y, _ = bounding_boxes[-1] draw.rectangle([(adjusted_x,adjusted_y), (adjusted_max_x, adjusted_max_y)], outline="black", width=1) position = (adjusted_x,adjusted_y) for line in lines: draw.text(position, line, fill= FONT_COLOUR, font=font) if ADD_OVERLAY: overlay.add_next_text_at_position_no_update(position[0], position[1], line, text_color=FONT_COLOUR) adjusted_y += FONT_SIZE + LINE_SPACING position = (adjusted_x,adjusted_y) ### Only support for horizontal text atm, vertical text is on the todo list def draw_one_phrase_replace(draw: ImageDraw, translated_phrase: str, position: tuple, bounding_boxes: list, untranslated_phrase: str) -> ImageDraw: """Cover up old text and add translation directly on top""" # Draw the bounding box top_left, _, _, bottom_right = position max_width = bottom_right[0] - top_left[0] font_size = bottom_right[1] - top_left[1] draw.rectangle([top_left, bottom_right], fill=FILL_COLOUR) while True: font = ImageFont.truetype(FONT_FILE, font_size) if font.get_max_width < max_width: draw.text(top_left, translated_phrase, fill= FONT_COLOUR, font=font) break elif font_size <= 1: break else: font_size -= 1 def get_max_width(lines: list, font_path, font_size) -> int: """Get the maximum width of the text lines""" font = ImageFont.truetype(font_path, font_size) max_width = 0 dummy_image = Image.new("RGB", (1, 1)) draw = ImageDraw.Draw(dummy_image) for line in lines: bbox = draw.textbbox((0,0), line, font=font) line_width = bbox[2] - bbox[0] max_width = max(max_width, line_width) return max_width def get_max_height(lines: list, font_size, line_spacing) -> int: """Get the maximum height of the text lines""" return len(lines) * (font_size + line_spacing) def adjust_if_intersects(x: int, y: int, bounding_box: tuple, bounding_boxes: list, untranslated_phrase: str, max_width: int, total_height: int) -> tuple: """Adjust the y coordinate if the bounding box intersects with any other bounding box""" y = np.max([y,0]) if len(bounding_boxes) > 0: for box in bounding_boxes: if intercepts((box[0],box[2]),(bounding_box[0],bounding_box[2])) and intercepts((box[1],box[3]),(y, y+total_height)): y = np.max([y,box[3]]) + LINE_SPACING adjusted_bounding_box = (x, y, x + max_width, y + total_height, untranslated_phrase) bounding_boxes.append(adjusted_bounding_box) return adjusted_bounding_box