################################################################################### ##### IMPORT LIBRARIES ##### import os, time, logging, ast from helpers.translation import init_TRANSLATE, translate from helpers.utils import intercepts, contains_lang, printsc, romanize, convert_image_to_bytes, bytes_to_image from helpers.ocr import id_filtered, id_lang, get_words, get_positions, get_confidences, init_OCR from logging_config import setup_logger from helpers.draw import modify_image_bytes ################################################################################### #### LOGGING #### setup_logger('chinese_to_eng', log_file='chinese_to_eng.log') ################################################################################### ##### Variables to edit ##### INTERVAL = int(os.getenv('INTERVAL')) ### available languages: 'ch_sim', 'ch_tra', 'ja', 'ko', 'en' SOURCE_LANG = os.getenv('SOURCE_LANG', 'ja') TARGET_LANG = os.getenv('TARGET_LANG', 'en') ### Translation TRANSLATION_MODEL = os.getenv('TRANSLATION_MODEL', 'opus') # 'opus' or 'm2m' # opus is a lot more lightweight MAX_TRANSLATE = 200 ### OCR OCR_USE_GPU = ast.literal_eval(os.getenv('OCR_USE_GPU', 'True')) OCR_MODEL = os.getenv('OCR_MODEL', 'easy') # 'easy', 'paddle', 'rapid' ### easy is the most accurate, paddle is the fastest with CUDA and rapid is the fastest with CPU REGION = ast.literal_eval(os.getenv('REGION','(0,0,2560,1440)')) ################################################################################### OCR_LANGUAGES = [SOURCE_LANG, TARGET_LANG, 'en'] latest_image = None def main(): global latest_image # screenshot untranslated_image = printsc(REGION) byte_image = convert_image_to_bytes(untranslated_image) ################################################################################### ##### Initialize the OCR ##### ocr = init_OCR(model=OCR_MODEL, ocr_languages = OCR_LANGUAGES, use_GPU=OCR_USE_GPU) ocr_output = id_lang(ocr, byte_image, 'ja') curr_words = set(get_words(ocr_output)) prev_words = set() ##### Initialize the translation ##### init_TRANSLATE() ################################################################################### while True: print('Running') if prev_words != curr_words: print('Translating') to_translate = [entry[1] for entry in ocr_output][:MAX_TRANSLATE] translation = translate(to_translate, from_lang, target_lang) print(translation) translated_image = modify_image_bytes(byte_image, ocr_output, translation) latest_image = bytes_to_image(translated_image) prev_words = curr_words logging.info(f"Successfully translated image. Prev words are:\n{prev_words}") else: logging.info("The image has remained the same.") # torch.cuda.empty_cache() logging.info(f'Sleeping for {INTERVAL} seconds') time.sleep(INTERVAL) untranslated_image = printsc(REGION) byte_image = convert_image_to_bytes(untranslated_image) ocr_output = id_lang(ocr, byte_image, 'ja') curr_words = set(get_words(ocr_output)) logging.info(f'Curr words to translate are:\n{curr_words}') if __name__ == "__main__": main() # image = Image.open(SCREENSHOT_PATH) # draw = ImageDraw.Draw(image) # # set counter for limiting the number of translations # translated_number = 0 # bounding_boxes = [] # for i, (position,words,confidence) in enumerate(ocr_output): # if translated_number >= MAX_TRANSLATE: # break # # try: # top_left, _, _, _ = position # position = (top_left[0], top_left[1] - 60) # text_content = f"{translation[i]}\n{romanize(words)}\n{words}" # lines = text_content.split('\n') # x,y = position # max_width = 0 # total_height = 0 # line_spacing = 3 # line_height = FONT_SIZE # for line in lines: # bbox = draw.textbbox(position, line, font=font) # line_width, _ = bbox[2] - bbox[0], bbox[3] - bbox[1] # max_width = max(max_width, line_width) # total_height += line_height + line_spacing # bounding_box = (x, y, x + max_width, y + total_height, words) # print(f"Bounding Box of Interest: {bounding_box}") # y = np.max([y,0]) # if len(bounding_boxes) > 0: # for box in bounding_boxes: # print(f'Investigating box: {box}') # if intercepts((box[0],box[2]),(bounding_box[0],bounding_box[2])) and intercepts((box[1],box[3]),(y, y+total_height)): # print(f'Overlapping change adjustment to {words}') # y = np.max([y,box[3]]) + line_spacing # print(y, box[3]) # print(f'Changed to {(x,y, x+max_width, y+total_height, words)}') # adjusted_bounding_box = (x, y, x + max_width, y + total_height, words) # bounding_boxes.append(adjusted_bounding_box) # draw.rectangle([(x,y), (x+max_width, y+total_height)], outline="black", width=1) # position = (x,y) # for line in lines: # draw.text(position, line, fill= TEXT_COLOR, font=font) # y += FONT_SIZE + line_spacing # position = (x,y) # print("Adjusted_bounding_box:",adjusted_bounding_box) # print('\n') # translated_number += 1