onscreen-translator/chinese_to_eng.py

###################################################################################
##### IMPORT LIBRARIES #####
import os, time, logging, ast
from helpers.translation import init_TRANSLATE, translate
from helpers.utils import intercepts, contains_lang, printsc, romanize, convert_image_to_bytes, bytes_to_image
from helpers.ocr import id_filtered, id_lang, get_words, get_positions, get_confidences, init_OCR
from logging_config import setup_logger
from helpers.draw import modify_image_bytes
###################################################################################

#### LOGGING ####
setup_logger('chinese_to_eng', log_file='chinese_to_eng.log')

###################################################################################
##### Variables  to edit #####


INTERVAL = int(os.getenv('INTERVAL'))

### available languages: 'ch_sim', 'ch_tra', 'ja', 'ko', 'en'
SOURCE_LANG = os.getenv('SOURCE_LANG', 'ja')
TARGET_LANG = os.getenv('TARGET_LANG', 'en')

### Translation
TRANSLATION_MODEL = os.getenv('TRANSLATION_MODEL', 'opus') # 'opus' or 'm2m' # opus is a lot more lightweight
MAX_TRANSLATE = 200

### OCR
OCR_USE_GPU = ast.literal_eval(os.getenv('OCR_USE_GPU', 'True'))
OCR_MODEL = os.getenv('OCR_MODEL', 'easy') # 'easy', 'paddle', 'rapid' ### easy is the most accurate, paddle is the fastest with CUDA and rapid is the fastest with CPU

REGION = ast.literal_eval(os.getenv('REGION','(0,0,2560,1440)'))
###################################################################################


OCR_LANGUAGES = [SOURCE_LANG, TARGET_LANG, 'en']
latest_image = None

def main():
    global latest_image
    # screenshot
    untranslated_image = printsc(REGION)
    byte_image = convert_image_to_bytes(untranslated_image)

    ###################################################################################
    ##### Initialize the OCR #####
    ocr = init_OCR(model=OCR_MODEL, ocr_languages = OCR_LANGUAGES, use_GPU=OCR_USE_GPU)
    ocr_output = id_lang(ocr, byte_image, 'ja')
    curr_words = set(get_words(ocr_output))
    prev_words = set()

    ##### Initialize the translation #####
    init_TRANSLATE()
    ###################################################################################

    while True:
        print('Running')
        if prev_words != curr_words:
            print('Translating')
            to_translate = [entry[1] for entry in ocr_output][:MAX_TRANSLATE]
            translation = translate(to_translate, from_lang, target_lang)
            print(translation)
            translated_image = modify_image_bytes(byte_image, ocr_output, translation)
            latest_image = bytes_to_image(translated_image)
            prev_words = curr_words
            logging.info(f"Successfully translated image. Prev words are:\n{prev_words}")
        else:
            logging.info("The image has remained the same.")
        # torch.cuda.empty_cache()
        logging.info(f'Sleeping for {INTERVAL} seconds')
        time.sleep(INTERVAL)

        untranslated_image = printsc(REGION)
        byte_image = convert_image_to_bytes(untranslated_image)
        ocr_output = id_lang(ocr, byte_image, 'ja')
        curr_words = set(get_words(ocr_output))
        logging.info(f'Curr words to translate are:\n{curr_words}')

if __name__ == "__main__":
    main()


# image = Image.open(SCREENSHOT_PATH)
#             draw = ImageDraw.Draw(image)

#             # set counter for limiting the number of translations
#             translated_number = 0
#             bounding_boxes = []
#             for i, (position,words,confidence) in enumerate(ocr_output):
#                 if translated_number >= MAX_TRANSLATE:
#                     break
#                 # try:
#                 top_left, _, _, _ = position
#                 position = (top_left[0], top_left[1] - 60)
#                 text_content = f"{translation[i]}\n{romanize(words)}\n{words}"
#                 lines = text_content.split('\n')
#                 x,y = position

#                 max_width = 0
#                 total_height = 0
#                 line_spacing = 3
#                 line_height = FONT_SIZE

#                 for line in lines:
#                     bbox = draw.textbbox(position, line, font=font)
#                     line_width, _ = bbox[2] - bbox[0], bbox[3] - bbox[1]
#                     max_width = max(max_width, line_width)
#                     total_height += line_height + line_spacing

#                 bounding_box = (x, y, x + max_width, y + total_height, words)
#                 print(f"Bounding Box of Interest: {bounding_box}")

#                 y = np.max([y,0])
#                 if len(bounding_boxes) > 0:
#                     for box in bounding_boxes:
#                         print(f'Investigating box: {box}')
#                         if intercepts((box[0],box[2]),(bounding_box[0],bounding_box[2])) and intercepts((box[1],box[3]),(y, y+total_height)):
#                             print(f'Overlapping change adjustment to {words}')
#                             y = np.max([y,box[3]]) + line_spacing
#                             print(y, box[3])
#                             print(f'Changed to {(x,y, x+max_width, y+total_height, words)}')
#                 adjusted_bounding_box = (x, y, x + max_width, y + total_height, words)
#                 bounding_boxes.append(adjusted_bounding_box)
#                 draw.rectangle([(x,y), (x+max_width, y+total_height)], outline="black", width=1)
#                 position = (x,y)
#                 for line in lines:
#                     draw.text(position, line, fill= TEXT_COLOR, font=font)
#                     y += FONT_SIZE + line_spacing
#                     position = (x,y)
#                 print("Adjusted_bounding_box:",adjusted_bounding_box)
#                 print('\n')
#                 translated_number += 1