84 lines
3.6 KiB
Python
84 lines
3.6 KiB
Python
from PIL import Image, ImageDraw, ImageFont
|
|
import os,io, sys, numpy as np
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'helpers'))
|
|
from utils import romanize, intercepts, add_furigana
|
|
from logging_config import logger
|
|
from config import SOURCE_LANG, MAX_TRANSLATE, FONT_FILE, FONT_SIZE, LINE_SPACING, TEXT_COLOR, LINE_HEIGHT, TO_ROMANIZE
|
|
|
|
|
|
font = ImageFont.truetype(FONT_FILE, FONT_SIZE)
|
|
|
|
|
|
|
|
def modify_image_bytes(image_bytes: io.BytesIO, ocr_output, translation: list) -> bytes:
|
|
# Load the image from bytes
|
|
with io.BytesIO(image_bytes) as byte_stream:
|
|
image = Image.open(byte_stream)
|
|
draw = ImageDraw.Draw(image)
|
|
translate_image(draw, translation, ocr_output, MAX_TRANSLATE)
|
|
|
|
# Save the modified image back to bytes without changing the format
|
|
with io.BytesIO() as byte_stream:
|
|
image.save(byte_stream, format=image.format) # Save in original format
|
|
modified_image_bytes = byte_stream.getvalue()
|
|
|
|
return modified_image_bytes
|
|
|
|
def translate_image(draw: ImageDraw, translation: list, ocr_output: list, max_translate: int) -> ImageDraw:
|
|
translated_number = 0
|
|
bounding_boxes = []
|
|
for i, (position, untranslated_phrase, confidence) in enumerate(ocr_output):
|
|
if translated_number >= max_translate:
|
|
break
|
|
translate_one_phrase(draw, translation[i], position, bounding_boxes, untranslated_phrase)
|
|
translated_number += 1
|
|
return draw
|
|
|
|
def translate_one_phrase(draw: ImageDraw, translated_phrase: str, position: tuple, bounding_boxes: list, untranslated_phrase: str) -> ImageDraw:
|
|
# Draw the bounding box
|
|
top_left, _, _, _ = position
|
|
position = (top_left[0], top_left[1] - 60)
|
|
if SOURCE_LANG == 'ja':
|
|
untranslated_phrase = add_furigana(untranslated_phrase)
|
|
romanized_phrase = romanize(untranslated_phrase, 'ja')
|
|
else:
|
|
romanized_phrase = romanize(untranslated_phrase, SOURCE_LANG)
|
|
if TO_ROMANIZE:
|
|
text_content = f"{translated_phrase}\n{romanized_phrase}\n{untranslated_phrase}"
|
|
else:
|
|
text_content = f"{translated_phrase}\n{untranslated_phrase}"
|
|
|
|
lines = text_content.split('\n')
|
|
x,y = position
|
|
max_width = 0
|
|
total_height = 0
|
|
total_height = len(lines) * (LINE_HEIGHT + LINE_SPACING)
|
|
for line in lines:
|
|
bbox = draw.textbbox(position, line, font=font)
|
|
line_width = bbox[2] - bbox[0]
|
|
max_width = max(max_width, line_width)
|
|
bounding_box = (x, y, x + max_width, y + total_height, untranslated_phrase)
|
|
|
|
adjust_if_intersects(x, y, bounding_box, bounding_boxes, untranslated_phrase, max_width, total_height)
|
|
adjusted_x, adjusted_y, adjusted_max_x, adjusted_max_y, _ = bounding_boxes[-1]
|
|
draw.rectangle([(adjusted_x,adjusted_y), (adjusted_max_x, adjusted_max_y)], outline="black", width=1)
|
|
position = (adjusted_x,adjusted_y)
|
|
for line in lines:
|
|
draw.text(position, line, fill= TEXT_COLOR, font=font)
|
|
adjusted_y += FONT_SIZE + LINE_SPACING
|
|
position = (adjusted_x,adjusted_y)
|
|
|
|
def adjust_if_intersects(x: int, y: int, bounding_box: tuple, bounding_boxes: list, untranslated_phrase: str, max_width: int, total_height: int) -> tuple:
|
|
y = np.max([y,0])
|
|
if len(bounding_boxes) > 0:
|
|
for box in bounding_boxes:
|
|
if intercepts((box[0],box[2]),(bounding_box[0],bounding_box[2])) and intercepts((box[1],box[3]),(y, y+total_height)):
|
|
y = np.max([y,box[3]]) + LINE_SPACING
|
|
adjusted_bounding_box = (x, y, x + max_width, y + total_height, untranslated_phrase)
|
|
bounding_boxes.append(adjusted_bounding_box)
|
|
return adjusted_bounding_box
|
|
|
|
|
|
|