onscreen-translator/draw_translation.py
2024-11-01 15:44:12 +11:00

154 lines
7.4 KiB
Python
Executable File

import easyocr
from pypinyin import pinyin
from PIL import Image, ImageDraw, ImageFont
import os, time, logging, torch, subprocess
from helpers.translation import init_M2M, translate_M2M
import langid
import numpy as np
##### Variables to edit
text_color = "#ff0000"
font_file = "/home/James/.local/share/fonts/Arial-Unicode-Bold.ttf"
font_size = 16
pyin = True # whether to add pinyin or not
max_translate = 100
# for detecting language to filter out other languages. Only writes the text when it is detected to be src_lang
src_lang = "zh"
tgt_lang = "en"
# af, am, an, ar, as, az, be, bg, bn, br, bs, ca, cs, cy, da, de, dz, el, en, eo, es, et, eu, fa, fi, fo, fr, ga, gl, gu, he, hi, hr, ht, hu, hy, id, is, it, ja, jv, ka, kk, km, kn, ko, ku, ky, la, lb, lo, lt, lv, mg, mk, ml, mn, mr, ms, mt, nb, ne, nl, nn, no, oc, or, pa, pl, ps, pt, qu, ro, ru, rw, se, si, sk, sl, sq, sr, sv, sw, ta, te, th, tl, tr, ug, uk, ur, vi, vo, wa, xh, zh, zu
langid.set_languages([src_lang,tgt_lang,'en'])
# for translator (M2M100)
from_lang = "zh"
target_lang = "en"
# Afrikaans (af), Amharic (am), Arabic (ar), Asturian (ast), Azerbaijani (az), Bashkir (ba), Belarusian (be), Bulgarian (bg), Bengali (bn), Breton (br), Bosnian (bs), Catalan; Valencian (ca), Cebuano (ceb), Czech (cs), Welsh (cy), Danish (da), German (de), Greeek (el), English (en), Spanish (es), Estonian (et), Persian (fa), Fulah (ff), Finnish (fi), French (fr), Western Frisian (fy), Irish (ga), Gaelic; Scottish Gaelic (gd), Galician (gl), Gujarati (gu), Hausa (ha), Hebrew (he), Hindi (hi), Croatian (hr), Haitian; Haitian Creole (ht), Hungarian (hu), Armenian (hy), Indonesian (id), Igbo (ig), Iloko (ilo), Icelandic (is), Italian (it), Japanese (ja), Javanese (jv), Georgian (ka), Kazakh (kk), Central Khmer (km), Kannada (kn), Korean (ko), Luxembourgish; Letzeburgesch (lb), Ganda (lg), Lingala (ln), Lao (lo), Lithuanian (lt), Latvian (lv), Malagasy (mg), Macedonian (mk), Malayalam (ml), Mongolian (mn), Marathi (mr), Malay (ms), Burmese (my), Nepali (ne), Dutch; Flemish (nl), Norwegian (no), Northern Sotho (ns), Occitan (post 1500) (oc), Oriya (or), Panjabi; Punjabi (pa), Polish (pl), Pushto; Pashto (ps), Portuguese (pt), Romanian; Moldavian; Moldovan (ro), Russian (ru), Sindhi (sd), Sinhala; Sinhalese (si), Slovak (sk), Slovenian (sl), Somali (so), Albanian (sq), Serbian (sr), Swati (ss), Sundanese (su), Swedish (sv), Swahili (sw), Tamil (ta), Thai (th), Tagalog (tl), Tswana (tn), Turkish (tr), Ukrainian (uk), Urdu (ur), Uzbek (uz), Vietnamese (vi), Wolof (wo), Xhosa (xh), Yiddish (yi), Yoruba (yo), Chinese (zh), Zulu (zu)
# for easyOCR
OCR_languages = ['ch_sim','en'] # languages to recognise
# https://www.jaided.ai/easyocr/
log_directory = '/var/log/ocr'
printsc = lambda x: subprocess.run(f"grim -t png -o DP-1 -l 0 {x}", shell=True)
# Configure the logger
os.makedirs(log_directory, exist_ok=True)
logging.basicConfig(
filename=os.path.join(log_directory, 'ocr.log'),
level=logging.DEBUG, # Set the logging level
format='%(asctime)s - %(message)s', # Define the format for logging
datefmt='%Y-%m-%d %H:%M:%S' # Define the date format
)
# screenshot
printsc(image_old)
time.sleep(1)
# EasyOCR
reader = easyocr.Reader(OCR_languages) # this needs to run only once to load the model into memory
def results():
result = reader.readtext(image_old)
results_no_eng = [entry for entry in result if langid.classify(entry[1])[0] == src_lang]
return results_no_eng
# result is a list of tuples with the following structure:
# (top_left, top_right, bottom_right, bottom_left, text, confidence)
# top_left, top_right, bottom_right, bottom_left are the coordinates of the bounding box
ocr_output = results()
curr_words = set(entry[1] for entry in ocr_output)
prev_words = set()
# translator = GoogleTranslator(source=from_language, target=target_language)
font = ImageFont.truetype(font_file, font_size)
# define a function for checking whether one axis of a shape intercepts with another
def intercepts(x,y):
# both x and y are two dimensional tuples representing the ends of a line on one dimension.
x1, x2 = x
y1, y2 = y
return (x1 <= y1 <= x2) or (x1 <= y2 <= x2) or (y1 <= x1 <= y2) or (y1 <= x2 <= y2)
while True:
print('Running')
if prev_words != curr_words:
print('Translating')
image = Image.open(image_old)
draw = ImageDraw.Draw(image)
to_translate = [entry[1] for entry in ocr_output][:max_translate]
translation = translate_M2M(to_translate, from_lang = from_lang, target_lang = target_lang)
# set counter for limiting the number of translations
translated_number = 0
bounding_boxes = []
for i, (position,words,confidence) in enumerate(ocr_output):
if translated_number >= max_translate:
break
word = translation[i]
# try:
top_left, _, _, _ = position
position = (top_left[0], top_left[1] - 60)
if pyin:
py = ' '.join([ py[0] for py in pinyin(words)])
text_content = f"{translation[i]}\n{py}\n{words}"
else:
text_content = f"{translation[i]}\n{words}"
lines = text_content.split('\n')
x,y = position
max_width = 0
total_height = 0
line_spacing = 3
line_height = font_size
for line in lines:
bbox = draw.textbbox(position, line, font=font)
line_width, _ = bbox[2] - bbox[0], bbox[3] - bbox[1]
max_width = max(max_width, line_width)
total_height += line_height + line_spacing
bounding_box = (x, y, x + max_width, y + total_height, words)
print(f"Bounding Box of Interest: {bounding_box}")
y = np.max([y,0])
if len(bounding_boxes) > 0:
for box in bounding_boxes:
print(f'Investigating box: {box}')
if intercepts((box[0],box[2]),(bounding_box[0],bounding_box[2])) and intercepts((box[1],box[3]),(y, y+total_height)):
print(f'Overlapping change adjustment to {words}')
y = np.max([y,box[3]]) + line_spacing
print(y, box[3])
print(f'Changed to {(x,y, x+max_width, y+total_height, words)}')
adjusted_bounding_box = (x, y, x + max_width, y + total_height, words)
bounding_boxes.append(adjusted_bounding_box)
draw.rectangle([(x,y), (x+max_width, y+total_height)], outline="black", width=1)
position = (x,y)
for line in lines:
draw.text(position, line, fill= text_color, font=font)
y += font_size + line_spacing
position = (x,y)
print("Adjusted_bounding_box:",adjusted_bounding_box)
print('\n')
# except Exception as e:
# logging.error(e)
translated_number += 1
image.save(image_new)
logging.info(f"Saved the image to {image_new}")
prev_words = curr_words
logging.info(f"Successfully translated image. Prev words are:\n{prev_words}")
else:
logging.info("The image has remained the same.")
torch.cuda.empty_cache()
print('Sleeping')
time.sleep(10)
printsc(image_old)
ocr_output = results()
curr_words = set(entry[1] for entry in ocr_output)
logging.info(f'Curr words are:\n{curr_words}')