onscreen-translator/chinese_to_eng.py
2024-11-01 15:44:12 +11:00

132 lines
5.7 KiB
Python

###################################################################################
##### IMPORT LIBRARIES #####
import os, time, logging, ast
from helpers.translation import init_TRANSLATE, translate
from helpers.utils import intercepts, contains_lang, printsc, romanize, convert_image_to_bytes, bytes_to_image
from helpers.ocr import id_filtered, id_lang, get_words, get_positions, get_confidences, init_OCR
from logging_config import setup_logger
from helpers.draw import modify_image_bytes
###################################################################################
#### LOGGING ####
setup_logger('chinese_to_eng', log_file='chinese_to_eng.log')
###################################################################################
##### Variables to edit #####
INTERVAL = int(os.getenv('INTERVAL'))
### available languages: 'ch_sim', 'ch_tra', 'ja', 'ko', 'en'
SOURCE_LANG = os.getenv('SOURCE_LANG', 'ja')
TARGET_LANG = os.getenv('TARGET_LANG', 'en')
### Translation
TRANSLATION_MODEL = os.getenv('TRANSLATION_MODEL', 'opus') # 'opus' or 'm2m' # opus is a lot more lightweight
MAX_TRANSLATE = 200
### OCR
OCR_USE_GPU = ast.literal_eval(os.getenv('OCR_USE_GPU', 'True'))
OCR_MODEL = os.getenv('OCR_MODEL', 'easy') # 'easy', 'paddle', 'rapid' ### easy is the most accurate, paddle is the fastest with CUDA and rapid is the fastest with CPU
REGION = ast.literal_eval(os.getenv('REGION','(0,0,2560,1440)'))
###################################################################################
OCR_LANGUAGES = [SOURCE_LANG, TARGET_LANG, 'en']
latest_image = None
def main():
global latest_image
# screenshot
untranslated_image = printsc(REGION)
byte_image = convert_image_to_bytes(untranslated_image)
###################################################################################
##### Initialize the OCR #####
ocr = init_OCR(model=OCR_MODEL, ocr_languages = OCR_LANGUAGES, use_GPU=OCR_USE_GPU)
ocr_output = id_lang(ocr, byte_image, 'ja')
curr_words = set(get_words(ocr_output))
prev_words = set()
##### Initialize the translation #####
init_TRANSLATE()
###################################################################################
while True:
print('Running')
if prev_words != curr_words:
print('Translating')
to_translate = [entry[1] for entry in ocr_output][:MAX_TRANSLATE]
translation = translate(to_translate, from_lang, target_lang)
print(translation)
translated_image = modify_image_bytes(byte_image, ocr_output, translation)
latest_image = bytes_to_image(translated_image)
prev_words = curr_words
logging.info(f"Successfully translated image. Prev words are:\n{prev_words}")
else:
logging.info("The image has remained the same.")
# torch.cuda.empty_cache()
logging.info(f'Sleeping for {INTERVAL} seconds')
time.sleep(INTERVAL)
untranslated_image = printsc(REGION)
byte_image = convert_image_to_bytes(untranslated_image)
ocr_output = id_lang(ocr, byte_image, 'ja')
curr_words = set(get_words(ocr_output))
logging.info(f'Curr words to translate are:\n{curr_words}')
if __name__ == "__main__":
main()
# image = Image.open(SCREENSHOT_PATH)
# draw = ImageDraw.Draw(image)
# # set counter for limiting the number of translations
# translated_number = 0
# bounding_boxes = []
# for i, (position,words,confidence) in enumerate(ocr_output):
# if translated_number >= MAX_TRANSLATE:
# break
# # try:
# top_left, _, _, _ = position
# position = (top_left[0], top_left[1] - 60)
# text_content = f"{translation[i]}\n{romanize(words)}\n{words}"
# lines = text_content.split('\n')
# x,y = position
# max_width = 0
# total_height = 0
# line_spacing = 3
# line_height = FONT_SIZE
# for line in lines:
# bbox = draw.textbbox(position, line, font=font)
# line_width, _ = bbox[2] - bbox[0], bbox[3] - bbox[1]
# max_width = max(max_width, line_width)
# total_height += line_height + line_spacing
# bounding_box = (x, y, x + max_width, y + total_height, words)
# print(f"Bounding Box of Interest: {bounding_box}")
# y = np.max([y,0])
# if len(bounding_boxes) > 0:
# for box in bounding_boxes:
# print(f'Investigating box: {box}')
# if intercepts((box[0],box[2]),(bounding_box[0],bounding_box[2])) and intercepts((box[1],box[3]),(y, y+total_height)):
# print(f'Overlapping change adjustment to {words}')
# y = np.max([y,box[3]]) + line_spacing
# print(y, box[3])
# print(f'Changed to {(x,y, x+max_width, y+total_height, words)}')
# adjusted_bounding_box = (x, y, x + max_width, y + total_height, words)
# bounding_boxes.append(adjusted_bounding_box)
# draw.rectangle([(x,y), (x+max_width, y+total_height)], outline="black", width=1)
# position = (x,y)
# for line in lines:
# draw.text(position, line, fill= TEXT_COLOR, font=font)
# y += FONT_SIZE + line_spacing
# position = (x,y)
# print("Adjusted_bounding_box:",adjusted_bounding_box)
# print('\n')
# translated_number += 1