import easyocr from pypinyin import pinyin from PIL import Image, ImageDraw, ImageFont import os, time, logging, torch, subprocess from helpers.translation import init_M2M, translate_M2M import langid import numpy as np ##### Variables to edit text_color = "#ff0000" font_file = "/home/James/.local/share/fonts/Arial-Unicode-Bold.ttf" font_size = 16 pyin = True # whether to add pinyin or not max_translate = 100 # for detecting language to filter out other languages. Only writes the text when it is detected to be src_lang src_lang = "zh" tgt_lang = "en" # af, am, an, ar, as, az, be, bg, bn, br, bs, ca, cs, cy, da, de, dz, el, en, eo, es, et, eu, fa, fi, fo, fr, ga, gl, gu, he, hi, hr, ht, hu, hy, id, is, it, ja, jv, ka, kk, km, kn, ko, ku, ky, la, lb, lo, lt, lv, mg, mk, ml, mn, mr, ms, mt, nb, ne, nl, nn, no, oc, or, pa, pl, ps, pt, qu, ro, ru, rw, se, si, sk, sl, sq, sr, sv, sw, ta, te, th, tl, tr, ug, uk, ur, vi, vo, wa, xh, zh, zu langid.set_languages([src_lang,tgt_lang,'en']) # for translator (M2M100) from_lang = "zh" target_lang = "en" # Afrikaans (af), Amharic (am), Arabic (ar), Asturian (ast), Azerbaijani (az), Bashkir (ba), Belarusian (be), Bulgarian (bg), Bengali (bn), Breton (br), Bosnian (bs), Catalan; Valencian (ca), Cebuano (ceb), Czech (cs), Welsh (cy), Danish (da), German (de), Greeek (el), English (en), Spanish (es), Estonian (et), Persian (fa), Fulah (ff), Finnish (fi), French (fr), Western Frisian (fy), Irish (ga), Gaelic; Scottish Gaelic (gd), Galician (gl), Gujarati (gu), Hausa (ha), Hebrew (he), Hindi (hi), Croatian (hr), Haitian; Haitian Creole (ht), Hungarian (hu), Armenian (hy), Indonesian (id), Igbo (ig), Iloko (ilo), Icelandic (is), Italian (it), Japanese (ja), Javanese (jv), Georgian (ka), Kazakh (kk), Central Khmer (km), Kannada (kn), Korean (ko), Luxembourgish; Letzeburgesch (lb), Ganda (lg), Lingala (ln), Lao (lo), Lithuanian (lt), Latvian (lv), Malagasy (mg), Macedonian (mk), Malayalam (ml), Mongolian (mn), Marathi (mr), Malay (ms), Burmese (my), Nepali (ne), Dutch; Flemish (nl), Norwegian (no), Northern Sotho (ns), Occitan (post 1500) (oc), Oriya (or), Panjabi; Punjabi (pa), Polish (pl), Pushto; Pashto (ps), Portuguese (pt), Romanian; Moldavian; Moldovan (ro), Russian (ru), Sindhi (sd), Sinhala; Sinhalese (si), Slovak (sk), Slovenian (sl), Somali (so), Albanian (sq), Serbian (sr), Swati (ss), Sundanese (su), Swedish (sv), Swahili (sw), Tamil (ta), Thai (th), Tagalog (tl), Tswana (tn), Turkish (tr), Ukrainian (uk), Urdu (ur), Uzbek (uz), Vietnamese (vi), Wolof (wo), Xhosa (xh), Yiddish (yi), Yoruba (yo), Chinese (zh), Zulu (zu) # for easyOCR OCR_languages = ['ch_sim','en'] # languages to recognise # https://www.jaided.ai/easyocr/ log_directory = '/var/log/ocr' printsc = lambda x: subprocess.run(f"grim -t png -o DP-1 -l 0 {x}", shell=True) # Configure the logger os.makedirs(log_directory, exist_ok=True) logging.basicConfig( filename=os.path.join(log_directory, 'ocr.log'), level=logging.DEBUG, # Set the logging level format='%(asctime)s - %(message)s', # Define the format for logging datefmt='%Y-%m-%d %H:%M:%S' # Define the date format ) # screenshot printsc(image_old) time.sleep(1) # EasyOCR reader = easyocr.Reader(OCR_languages) # this needs to run only once to load the model into memory def results(): result = reader.readtext(image_old) results_no_eng = [entry for entry in result if langid.classify(entry[1])[0] == src_lang] return results_no_eng # result is a list of tuples with the following structure: # (top_left, top_right, bottom_right, bottom_left, text, confidence) # top_left, top_right, bottom_right, bottom_left are the coordinates of the bounding box ocr_output = results() curr_words = set(entry[1] for entry in ocr_output) prev_words = set() # translator = GoogleTranslator(source=from_language, target=target_language) font = ImageFont.truetype(font_file, font_size) # define a function for checking whether one axis of a shape intercepts with another def intercepts(x,y): # both x and y are two dimensional tuples representing the ends of a line on one dimension. x1, x2 = x y1, y2 = y return (x1 <= y1 <= x2) or (x1 <= y2 <= x2) or (y1 <= x1 <= y2) or (y1 <= x2 <= y2) while True: print('Running') if prev_words != curr_words: print('Translating') image = Image.open(image_old) draw = ImageDraw.Draw(image) to_translate = [entry[1] for entry in ocr_output][:max_translate] translation = translate_M2M(to_translate, from_lang = from_lang, target_lang = target_lang) # set counter for limiting the number of translations translated_number = 0 bounding_boxes = [] for i, (position,words,confidence) in enumerate(ocr_output): if translated_number >= max_translate: break word = translation[i] # try: top_left, _, _, _ = position position = (top_left[0], top_left[1] - 60) if pyin: py = ' '.join([ py[0] for py in pinyin(words)]) text_content = f"{translation[i]}\n{py}\n{words}" else: text_content = f"{translation[i]}\n{words}" lines = text_content.split('\n') x,y = position max_width = 0 total_height = 0 line_spacing = 3 line_height = font_size for line in lines: bbox = draw.textbbox(position, line, font=font) line_width, _ = bbox[2] - bbox[0], bbox[3] - bbox[1] max_width = max(max_width, line_width) total_height += line_height + line_spacing bounding_box = (x, y, x + max_width, y + total_height, words) print(f"Bounding Box of Interest: {bounding_box}") y = np.max([y,0]) if len(bounding_boxes) > 0: for box in bounding_boxes: print(f'Investigating box: {box}') if intercepts((box[0],box[2]),(bounding_box[0],bounding_box[2])) and intercepts((box[1],box[3]),(y, y+total_height)): print(f'Overlapping change adjustment to {words}') y = np.max([y,box[3]]) + line_spacing print(y, box[3]) print(f'Changed to {(x,y, x+max_width, y+total_height, words)}') adjusted_bounding_box = (x, y, x + max_width, y + total_height, words) bounding_boxes.append(adjusted_bounding_box) draw.rectangle([(x,y), (x+max_width, y+total_height)], outline="black", width=1) position = (x,y) for line in lines: draw.text(position, line, fill= text_color, font=font) y += font_size + line_spacing position = (x,y) print("Adjusted_bounding_box:",adjusted_bounding_box) print('\n') # except Exception as e: # logging.error(e) translated_number += 1 image.save(image_new) logging.info(f"Saved the image to {image_new}") prev_words = curr_words logging.info(f"Successfully translated image. Prev words are:\n{prev_words}") else: logging.info("The image has remained the same.") torch.cuda.empty_cache() print('Sleeping') time.sleep(10) printsc(image_old) ocr_output = results() curr_words = set(entry[1] for entry in ocr_output) logging.info(f'Curr words are:\n{curr_words}')