246 lines
11 KiB
Python
246 lines
11 KiB
Python
from PIL import Image, ImageDraw, ImageFont, ImageFilter
|
|
import os, io, sys, numpy as np
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'helpers'))
|
|
from utils import romanize, intercepts, add_furigana
|
|
from logging_config import logger
|
|
from config import SOURCE_LANG, MAX_TRANSLATE, FONT_FILE, FONT_SIZE_MAX,FONT_SIZE_MIN, FONT_SIZE, LINE_SPACING, FONT_COLOUR, LINE_HEIGHT, TO_ROMANIZE, FILL_COLOUR, REGION, DRAW_TRANSLATIONS_MODE
|
|
|
|
font = ImageFont.truetype(FONT_FILE, FONT_SIZE)
|
|
|
|
#### CREATE A CLASS LATER so it doesn't have to inherit the same arguments all the way too confusing :| its so ass like this man i had no foresight
|
|
|
|
def modify_image_bytes(image_bytes: io.BytesIO, ocr_output, translation: list) -> bytes:
|
|
"""Modify the image bytes with the translated text and return the modified image bytes"""
|
|
|
|
with io.BytesIO(image_bytes) as byte_stream:
|
|
image = Image.open(byte_stream)
|
|
draw = ImageDraw.Draw(image)
|
|
draw_on_image(draw, translation, ocr_output, MAX_TRANSLATE)
|
|
|
|
# Save the modified image back to bytes without changing the format
|
|
with io.BytesIO() as byte_stream:
|
|
image.save(byte_stream, format=image.format) # Save in original format
|
|
modified_image_bytes = byte_stream.getvalue()
|
|
return modified_image_bytes
|
|
|
|
def draw_on_image(draw: ImageDraw, translation: list, ocr_output: list, max_translate: int, draw_mode: str = DRAW_TRANSLATIONS_MODE) -> ImageDraw:
|
|
"""Draw the original, translated and optionally the romanisation of the texts on the image"""
|
|
translated_number = 0
|
|
bounding_boxes = []
|
|
for i, (position, untranslated_phrase, confidence) in enumerate(ocr_output):
|
|
if translated_number >= len(translation): # note if using api llm some issues may cause it to return less translations than expected
|
|
break
|
|
if draw_mode == 'learn':
|
|
draw_one_phrase_learn(draw, translation[i], position, bounding_boxes, untranslated_phrase)
|
|
elif draw_mode == 'translation_only':
|
|
draw_one_phrase_translation_only(draw, translation[i], position, bounding_boxes, untranslated_phrase)
|
|
elif draw_mode == 'learn_cover':
|
|
draw_one_phrase_learn_cover(draw, translation[i], position, bounding_boxes, untranslated_phrase)
|
|
elif draw_mode == 'translation_only_cover':
|
|
draw_one_phrase_translation_only_cover(draw, translation[i], position, bounding_boxes, untranslated_phrase)
|
|
translated_number += 1
|
|
|
|
def draw_one_phrase_learn(draw: ImageDraw,
|
|
translated_phrase: str,
|
|
position: tuple, bounding_boxes: list,
|
|
untranslated_phrase: str) -> ImageDraw:
|
|
"""Draw the bounding box rectangle and text on the image above the original text"""
|
|
|
|
lines = get_lines(untranslated_phrase, translated_phrase)
|
|
# Draw the bounding box
|
|
top_left, _, bottom_right,_ = position
|
|
font_size = get_font_size(top_left[1], bottom_right[1], FONT_SIZE_MAX, FONT_SIZE_MIN)
|
|
max_width = get_max_width(lines, FONT_FILE, font_size)
|
|
total_height = get_max_height(lines, font_size, LINE_SPACING)
|
|
font = ImageFont.truetype(FONT_FILE, font_size)
|
|
right_edge = REGION[2]
|
|
|
|
# Ensure the text is within the screen. P.S. Text on the edge may still be squished together if there are too many to translate
|
|
x_onscreen = top_left[0] if top_left[0] + max_width <= right_edge else right_edge - max_width
|
|
y_onscreen = max(top_left[1] - total_height, 0)
|
|
bounding_box = (x_onscreen, y_onscreen, x_onscreen + max_width, y_onscreen + total_height, untranslated_phrase)
|
|
|
|
adjust_if_intersects(x_onscreen, y_onscreen, bounding_box, bounding_boxes, untranslated_phrase, max_width, total_height)
|
|
|
|
adjusted_x, adjusted_y, adjusted_max_x, adjusted_max_y, _ = bounding_boxes[-1]
|
|
draw.rectangle([(adjusted_x,adjusted_y), (adjusted_max_x, adjusted_max_y)], outline="black", width=1)
|
|
position = (adjusted_x,adjusted_y)
|
|
|
|
|
|
for line in lines:
|
|
if FONT_COLOUR == 'rainbow':
|
|
rainbow_text(draw, line, *position, font)
|
|
else:
|
|
draw.text(position, line, fill= FONT_COLOUR, font=font)
|
|
adjusted_y += font_size + LINE_SPACING
|
|
position = (adjusted_x,adjusted_y)
|
|
|
|
|
|
|
|
|
|
### Only support for horizontal text atm, vertical text is on the todo list
|
|
def draw_one_phrase_translation_only_cover(draw: ImageDraw,
|
|
translated_phrase: str,
|
|
position: tuple, bounding_boxes: list,
|
|
untranslated_phrase: str) -> ImageDraw:
|
|
"""Cover up old text and add translation directly on top"""
|
|
# Draw the bounding box
|
|
top_left, _, bottom_right, _ = position
|
|
bounding_boxes.append((top_left[0], top_left[1], bottom_right[0], bottom_right[1], untranslated_phrase)) # Debugging purposes
|
|
max_width = bottom_right[0] - top_left[0]
|
|
font_size = get_font_size(top_left[1], bottom_right[1], FONT_SIZE_MAX, FONT_SIZE_MIN)
|
|
while True:
|
|
font = ImageFont.truetype(FONT_FILE, font_size)
|
|
phrase_width = get_max_width(translated_phrase, FONT_FILE, font_size)
|
|
rectangle = get_rectangle_coordinates(translated_phrase, top_left, FONT_FILE, font_size, LINE_SPACING)
|
|
|
|
if phrase_width < max_width:
|
|
draw.rectangle(rectangle, fill=FILL_COLOUR)
|
|
if FONT_COLOUR == 'rainbow':
|
|
rainbow_text(draw, translated_phrase, *top_left, font)
|
|
else:
|
|
draw.text(top_left, translated_phrase, fill= FONT_COLOUR, font=font)
|
|
|
|
break
|
|
elif font_size <= FONT_SIZE_MIN:
|
|
break
|
|
else:
|
|
font_size -= 1
|
|
|
|
def draw_one_phrase_learn_cover(draw: ImageDraw,
|
|
translated_phrase: str,
|
|
position: tuple, bounding_boxes: list,
|
|
untranslated_phrase: str) -> ImageDraw:
|
|
"""Cover up old text and add translation directly on top"""
|
|
lines = get_lines(untranslated_phrase, translated_phrase)
|
|
# Draw the bounding box
|
|
top_left, _, bottom_right,_ = position
|
|
font_size = get_font_size(top_left[1], bottom_right[1], FONT_SIZE_MAX, FONT_SIZE_MIN)
|
|
max_width = get_max_width(lines, FONT_FILE, font_size)
|
|
total_height = get_max_height(lines, font_size, LINE_SPACING)
|
|
font = ImageFont.truetype(FONT_FILE, font_size)
|
|
right_edge = REGION[2]
|
|
|
|
# Ensure the text is within the screen. P.S. Text on the edge may still be squished together if there are too many to translate
|
|
x_onscreen = top_left[0] if top_left[0] + max_width <= right_edge else right_edge - max_width
|
|
y_onscreen = max(top_left[1] - int(total_height/3), 0)
|
|
bounding_box = (x_onscreen, y_onscreen, x_onscreen + max_width, y_onscreen + total_height, untranslated_phrase)
|
|
|
|
adjust_if_intersects(x_onscreen, y_onscreen, bounding_box, bounding_boxes, untranslated_phrase, max_width, total_height)
|
|
|
|
adjusted_x, adjusted_y, adjusted_max_x, adjusted_max_y, _ = bounding_boxes[-1]
|
|
draw.rounded_rectangle([(adjusted_x,adjusted_y), (adjusted_max_x, adjusted_max_y)], fill=FILL_COLOUR,outline="black", width=2, radius=5)
|
|
position = (adjusted_x,adjusted_y)
|
|
|
|
|
|
for line in lines:
|
|
if FONT_COLOUR == 'rainbow': # easter egg yay
|
|
rainbow_text(draw, line, *position, font)
|
|
else:
|
|
draw.text(position, line, fill= FONT_COLOUR, font=font)
|
|
adjusted_y += font_size + LINE_SPACING
|
|
position = (adjusted_x,adjusted_y)
|
|
|
|
def draw_one_phrase_translation_only(draw: ImageDraw,
|
|
translated_phrase: str,
|
|
position: tuple, bounding_boxes: list,
|
|
untranslated_phrase: str) -> ImageDraw:
|
|
"""Cover up old text and add translation directly on top"""
|
|
# Draw the bounding box
|
|
pass
|
|
|
|
def get_rectangle_coordinates(lines: list | str, top_left: tuple | list, font_path, font_size, line_spacing, padding: int = 1) -> list:
|
|
|
|
"""Get the coordinates of the rectangle surrounding the text"""
|
|
|
|
text_width = get_max_width(lines, font_path, font_size)
|
|
text_height = get_max_height(lines, font_size, line_spacing)
|
|
x1 = top_left[0] - padding
|
|
y1 = top_left[1] - padding
|
|
x2 = top_left[0] + text_width + padding
|
|
y2 = top_left[1] + text_height + padding
|
|
return [(x1,y1), (x2,y2)]
|
|
|
|
def get_max_width(lines: list | str, font_path, font_size) -> int:
|
|
"""Get the maximum width of the text lines"""
|
|
font = ImageFont.truetype(font_path, font_size)
|
|
max_width = 0
|
|
dummy_image = Image.new("RGB", (1, 1))
|
|
draw = ImageDraw.Draw(dummy_image)
|
|
if isinstance(lines, list):
|
|
for line in lines:
|
|
bbox = draw.textbbox((0,0), line, font=font)
|
|
line_width = bbox[2] - bbox[0]
|
|
max_width = max(max_width, line_width)
|
|
else:
|
|
bbox = draw.textbbox((0,0), lines, font=font)
|
|
max_width = bbox[2] - bbox[0]
|
|
return max_width
|
|
|
|
def get_max_height(lines: list | str, font_size, line_spacing) -> int:
|
|
"""Get the maximum height of the text lines"""
|
|
no_of_lines = len(lines) if isinstance(lines, list) else 1
|
|
return no_of_lines * (font_size + line_spacing)
|
|
|
|
def get_lines(untranslated_phrase: str, translated_phrase: str) -> list:
|
|
"""Get the translated. untranslated and optionally the romanised text as a list"""
|
|
if SOURCE_LANG == 'ja':
|
|
untranslated_phrase = add_furigana(untranslated_phrase)
|
|
romanized_phrase = romanize(untranslated_phrase, 'ja')
|
|
else:
|
|
romanized_phrase = romanize(untranslated_phrase, SOURCE_LANG)
|
|
if TO_ROMANIZE:
|
|
text_content = f"{translated_phrase}\n{romanized_phrase}\n{untranslated_phrase}"
|
|
else:
|
|
text_content = f"{translated_phrase}\n{untranslated_phrase}"
|
|
return text_content.split('\n')
|
|
|
|
|
|
def adjust_if_intersects(x: int, y: int,
|
|
bounding_box: tuple, bounding_boxes: list,
|
|
untranslated_phrase: str,
|
|
max_width: int, total_height: int) -> tuple:
|
|
"""Adjust the y coordinate every time the bounding box intersects with any previous bounding boxes. OCR returns results from top to bottom so it works."""
|
|
y = np.max([y,0])
|
|
if len(bounding_boxes) > 0:
|
|
for box in bounding_boxes:
|
|
if intercepts((box[0],box[2]),(bounding_box[0],bounding_box[2])) and intercepts((box[1],box[3]),(y, y+total_height)):
|
|
y = np.max([y,box[3]]) + LINE_SPACING
|
|
adjusted_bounding_box = (x, y, x + max_width, y + total_height, untranslated_phrase)
|
|
bounding_boxes.append(adjusted_bounding_box)
|
|
return adjusted_bounding_box
|
|
|
|
|
|
def get_font_size(y_1, y_2, font_size_max: int, font_size_min: int) -> int:
|
|
"""Get the average of the maximum and minimum font sizes"""
|
|
if font_size_min > font_size_max:
|
|
raise ValueError("Minimum font size cannot be greater than maximum font size")
|
|
font_size = min(
|
|
max(int(abs(2/3*(y_2-y_1))), font_size_min),
|
|
font_size_max)
|
|
return font_size
|
|
|
|
|
|
|
|
|
|
def rainbow_text(draw,text,x,y,font):
|
|
for i, letter in enumerate(text):
|
|
# Calculate hue for rainbow effect
|
|
# Convert HSV to RGB (using full saturation and value)
|
|
rgb = tuple(np.random.randint(50,255,3))
|
|
# Get the width of this letter
|
|
|
|
letter_bbox = draw.textbbox((x, y), letter, font=font)
|
|
letter_width = letter_bbox[2] - letter_bbox[0]
|
|
|
|
# Draw the letter
|
|
draw.text((x, y), letter, fill=rgb, font=font)
|
|
|
|
# Move x position for next letter
|
|
x += letter_width
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|
|
|