154 lines
7.4 KiB
Python
Executable File
154 lines
7.4 KiB
Python
Executable File
import easyocr
|
|
from pypinyin import pinyin
|
|
from PIL import Image, ImageDraw, ImageFont
|
|
import os, time, logging, torch, subprocess
|
|
from helpers.translation import init_M2M, translate_M2M
|
|
import langid
|
|
import numpy as np
|
|
|
|
##### Variables to edit
|
|
|
|
text_color = "#ff0000"
|
|
font_file = "/home/James/.local/share/fonts/Arial-Unicode-Bold.ttf"
|
|
font_size = 16
|
|
|
|
pyin = True # whether to add pinyin or not
|
|
max_translate = 100
|
|
|
|
# for detecting language to filter out other languages. Only writes the text when it is detected to be src_lang
|
|
src_lang = "zh"
|
|
tgt_lang = "en"
|
|
# af, am, an, ar, as, az, be, bg, bn, br, bs, ca, cs, cy, da, de, dz, el, en, eo, es, et, eu, fa, fi, fo, fr, ga, gl, gu, he, hi, hr, ht, hu, hy, id, is, it, ja, jv, ka, kk, km, kn, ko, ku, ky, la, lb, lo, lt, lv, mg, mk, ml, mn, mr, ms, mt, nb, ne, nl, nn, no, oc, or, pa, pl, ps, pt, qu, ro, ru, rw, se, si, sk, sl, sq, sr, sv, sw, ta, te, th, tl, tr, ug, uk, ur, vi, vo, wa, xh, zh, zu
|
|
langid.set_languages([src_lang,tgt_lang,'en'])
|
|
|
|
# for translator (M2M100)
|
|
from_lang = "zh"
|
|
target_lang = "en"
|
|
|
|
# Afrikaans (af), Amharic (am), Arabic (ar), Asturian (ast), Azerbaijani (az), Bashkir (ba), Belarusian (be), Bulgarian (bg), Bengali (bn), Breton (br), Bosnian (bs), Catalan; Valencian (ca), Cebuano (ceb), Czech (cs), Welsh (cy), Danish (da), German (de), Greeek (el), English (en), Spanish (es), Estonian (et), Persian (fa), Fulah (ff), Finnish (fi), French (fr), Western Frisian (fy), Irish (ga), Gaelic; Scottish Gaelic (gd), Galician (gl), Gujarati (gu), Hausa (ha), Hebrew (he), Hindi (hi), Croatian (hr), Haitian; Haitian Creole (ht), Hungarian (hu), Armenian (hy), Indonesian (id), Igbo (ig), Iloko (ilo), Icelandic (is), Italian (it), Japanese (ja), Javanese (jv), Georgian (ka), Kazakh (kk), Central Khmer (km), Kannada (kn), Korean (ko), Luxembourgish; Letzeburgesch (lb), Ganda (lg), Lingala (ln), Lao (lo), Lithuanian (lt), Latvian (lv), Malagasy (mg), Macedonian (mk), Malayalam (ml), Mongolian (mn), Marathi (mr), Malay (ms), Burmese (my), Nepali (ne), Dutch; Flemish (nl), Norwegian (no), Northern Sotho (ns), Occitan (post 1500) (oc), Oriya (or), Panjabi; Punjabi (pa), Polish (pl), Pushto; Pashto (ps), Portuguese (pt), Romanian; Moldavian; Moldovan (ro), Russian (ru), Sindhi (sd), Sinhala; Sinhalese (si), Slovak (sk), Slovenian (sl), Somali (so), Albanian (sq), Serbian (sr), Swati (ss), Sundanese (su), Swedish (sv), Swahili (sw), Tamil (ta), Thai (th), Tagalog (tl), Tswana (tn), Turkish (tr), Ukrainian (uk), Urdu (ur), Uzbek (uz), Vietnamese (vi), Wolof (wo), Xhosa (xh), Yiddish (yi), Yoruba (yo), Chinese (zh), Zulu (zu)
|
|
|
|
# for easyOCR
|
|
OCR_languages = ['ch_sim','en'] # languages to recognise
|
|
# https://www.jaided.ai/easyocr/
|
|
|
|
log_directory = '/var/log/ocr'
|
|
printsc = lambda x: subprocess.run(f"grim -t png -o DP-1 -l 0 {x}", shell=True)
|
|
|
|
# Configure the logger
|
|
os.makedirs(log_directory, exist_ok=True)
|
|
|
|
logging.basicConfig(
|
|
filename=os.path.join(log_directory, 'ocr.log'),
|
|
level=logging.DEBUG, # Set the logging level
|
|
format='%(asctime)s - %(message)s', # Define the format for logging
|
|
datefmt='%Y-%m-%d %H:%M:%S' # Define the date format
|
|
)
|
|
|
|
# screenshot
|
|
printsc(image_old)
|
|
time.sleep(1)
|
|
|
|
# EasyOCR
|
|
reader = easyocr.Reader(OCR_languages) # this needs to run only once to load the model into memory
|
|
|
|
def results():
|
|
result = reader.readtext(image_old)
|
|
results_no_eng = [entry for entry in result if langid.classify(entry[1])[0] == src_lang]
|
|
return results_no_eng
|
|
|
|
# result is a list of tuples with the following structure:
|
|
# (top_left, top_right, bottom_right, bottom_left, text, confidence)
|
|
# top_left, top_right, bottom_right, bottom_left are the coordinates of the bounding box
|
|
ocr_output = results()
|
|
curr_words = set(entry[1] for entry in ocr_output)
|
|
prev_words = set()
|
|
|
|
# translator = GoogleTranslator(source=from_language, target=target_language)
|
|
|
|
font = ImageFont.truetype(font_file, font_size)
|
|
|
|
# define a function for checking whether one axis of a shape intercepts with another
|
|
def intercepts(x,y):
|
|
# both x and y are two dimensional tuples representing the ends of a line on one dimension.
|
|
x1, x2 = x
|
|
y1, y2 = y
|
|
return (x1 <= y1 <= x2) or (x1 <= y2 <= x2) or (y1 <= x1 <= y2) or (y1 <= x2 <= y2)
|
|
|
|
while True:
|
|
print('Running')
|
|
if prev_words != curr_words:
|
|
print('Translating')
|
|
image = Image.open(image_old)
|
|
draw = ImageDraw.Draw(image)
|
|
to_translate = [entry[1] for entry in ocr_output][:max_translate]
|
|
translation = translate_M2M(to_translate, from_lang = from_lang, target_lang = target_lang)
|
|
# set counter for limiting the number of translations
|
|
translated_number = 0
|
|
bounding_boxes = []
|
|
for i, (position,words,confidence) in enumerate(ocr_output):
|
|
if translated_number >= max_translate:
|
|
break
|
|
word = translation[i]
|
|
# try:
|
|
top_left, _, _, _ = position
|
|
position = (top_left[0], top_left[1] - 60)
|
|
if pyin:
|
|
py = ' '.join([ py[0] for py in pinyin(words)])
|
|
text_content = f"{translation[i]}\n{py}\n{words}"
|
|
else:
|
|
text_content = f"{translation[i]}\n{words}"
|
|
lines = text_content.split('\n')
|
|
x,y = position
|
|
|
|
max_width = 0
|
|
total_height = 0
|
|
line_spacing = 3
|
|
line_height = font_size
|
|
|
|
for line in lines:
|
|
bbox = draw.textbbox(position, line, font=font)
|
|
line_width, _ = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
|
max_width = max(max_width, line_width)
|
|
total_height += line_height + line_spacing
|
|
|
|
bounding_box = (x, y, x + max_width, y + total_height, words)
|
|
print(f"Bounding Box of Interest: {bounding_box}")
|
|
|
|
y = np.max([y,0])
|
|
if len(bounding_boxes) > 0:
|
|
for box in bounding_boxes:
|
|
print(f'Investigating box: {box}')
|
|
if intercepts((box[0],box[2]),(bounding_box[0],bounding_box[2])) and intercepts((box[1],box[3]),(y, y+total_height)):
|
|
print(f'Overlapping change adjustment to {words}')
|
|
y = np.max([y,box[3]]) + line_spacing
|
|
print(y, box[3])
|
|
print(f'Changed to {(x,y, x+max_width, y+total_height, words)}')
|
|
adjusted_bounding_box = (x, y, x + max_width, y + total_height, words)
|
|
bounding_boxes.append(adjusted_bounding_box)
|
|
draw.rectangle([(x,y), (x+max_width, y+total_height)], outline="black", width=1)
|
|
position = (x,y)
|
|
for line in lines:
|
|
draw.text(position, line, fill= text_color, font=font)
|
|
y += font_size + line_spacing
|
|
position = (x,y)
|
|
print("Adjusted_bounding_box:",adjusted_bounding_box)
|
|
print('\n')
|
|
# except Exception as e:
|
|
# logging.error(e)
|
|
translated_number += 1
|
|
image.save(image_new)
|
|
logging.info(f"Saved the image to {image_new}")
|
|
prev_words = curr_words
|
|
logging.info(f"Successfully translated image. Prev words are:\n{prev_words}")
|
|
else:
|
|
logging.info("The image has remained the same.")
|
|
torch.cuda.empty_cache()
|
|
print('Sleeping')
|
|
time.sleep(10)
|
|
|
|
printsc(image_old)
|
|
ocr_output = results()
|
|
curr_words = set(entry[1] for entry in ocr_output)
|
|
logging.info(f'Curr words are:\n{curr_words}')
|
|
|