132 lines
5.7 KiB
Python
132 lines
5.7 KiB
Python
###################################################################################
|
|
##### IMPORT LIBRARIES #####
|
|
import os, time, logging, ast
|
|
from helpers.translation import init_TRANSLATE, translate
|
|
from helpers.utils import intercepts, contains_lang, printsc, romanize, convert_image_to_bytes, bytes_to_image
|
|
from helpers.ocr import id_filtered, id_lang, get_words, get_positions, get_confidences, init_OCR
|
|
from logging_config import setup_logger
|
|
from helpers.draw import modify_image_bytes
|
|
###################################################################################
|
|
|
|
#### LOGGING ####
|
|
setup_logger('chinese_to_eng', log_file='chinese_to_eng.log')
|
|
|
|
###################################################################################
|
|
##### Variables to edit #####
|
|
|
|
|
|
INTERVAL = int(os.getenv('INTERVAL'))
|
|
|
|
### available languages: 'ch_sim', 'ch_tra', 'ja', 'ko', 'en'
|
|
SOURCE_LANG = os.getenv('SOURCE_LANG', 'ja')
|
|
TARGET_LANG = os.getenv('TARGET_LANG', 'en')
|
|
|
|
### Translation
|
|
TRANSLATION_MODEL = os.getenv('TRANSLATION_MODEL', 'opus') # 'opus' or 'm2m' # opus is a lot more lightweight
|
|
MAX_TRANSLATE = 200
|
|
|
|
### OCR
|
|
OCR_USE_GPU = ast.literal_eval(os.getenv('OCR_USE_GPU', 'True'))
|
|
OCR_MODEL = os.getenv('OCR_MODEL', 'easy') # 'easy', 'paddle', 'rapid' ### easy is the most accurate, paddle is the fastest with CUDA and rapid is the fastest with CPU
|
|
|
|
REGION = ast.literal_eval(os.getenv('REGION','(0,0,2560,1440)'))
|
|
###################################################################################
|
|
|
|
|
|
OCR_LANGUAGES = [SOURCE_LANG, TARGET_LANG, 'en']
|
|
latest_image = None
|
|
|
|
def main():
|
|
global latest_image
|
|
# screenshot
|
|
untranslated_image = printsc(REGION)
|
|
byte_image = convert_image_to_bytes(untranslated_image)
|
|
|
|
###################################################################################
|
|
##### Initialize the OCR #####
|
|
ocr = init_OCR(model=OCR_MODEL, ocr_languages = OCR_LANGUAGES, use_GPU=OCR_USE_GPU)
|
|
ocr_output = id_lang(ocr, byte_image, 'ja')
|
|
curr_words = set(get_words(ocr_output))
|
|
prev_words = set()
|
|
|
|
##### Initialize the translation #####
|
|
init_TRANSLATE()
|
|
###################################################################################
|
|
|
|
while True:
|
|
print('Running')
|
|
if prev_words != curr_words:
|
|
print('Translating')
|
|
to_translate = [entry[1] for entry in ocr_output][:MAX_TRANSLATE]
|
|
translation = translate(to_translate, from_lang, target_lang)
|
|
print(translation)
|
|
translated_image = modify_image_bytes(byte_image, ocr_output, translation)
|
|
latest_image = bytes_to_image(translated_image)
|
|
prev_words = curr_words
|
|
logging.info(f"Successfully translated image. Prev words are:\n{prev_words}")
|
|
else:
|
|
logging.info("The image has remained the same.")
|
|
# torch.cuda.empty_cache()
|
|
logging.info(f'Sleeping for {INTERVAL} seconds')
|
|
time.sleep(INTERVAL)
|
|
|
|
untranslated_image = printsc(REGION)
|
|
byte_image = convert_image_to_bytes(untranslated_image)
|
|
ocr_output = id_lang(ocr, byte_image, 'ja')
|
|
curr_words = set(get_words(ocr_output))
|
|
logging.info(f'Curr words to translate are:\n{curr_words}')
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|
|
|
|
# image = Image.open(SCREENSHOT_PATH)
|
|
# draw = ImageDraw.Draw(image)
|
|
|
|
# # set counter for limiting the number of translations
|
|
# translated_number = 0
|
|
# bounding_boxes = []
|
|
# for i, (position,words,confidence) in enumerate(ocr_output):
|
|
# if translated_number >= MAX_TRANSLATE:
|
|
# break
|
|
# # try:
|
|
# top_left, _, _, _ = position
|
|
# position = (top_left[0], top_left[1] - 60)
|
|
# text_content = f"{translation[i]}\n{romanize(words)}\n{words}"
|
|
# lines = text_content.split('\n')
|
|
# x,y = position
|
|
|
|
# max_width = 0
|
|
# total_height = 0
|
|
# line_spacing = 3
|
|
# line_height = FONT_SIZE
|
|
|
|
# for line in lines:
|
|
# bbox = draw.textbbox(position, line, font=font)
|
|
# line_width, _ = bbox[2] - bbox[0], bbox[3] - bbox[1]
|
|
# max_width = max(max_width, line_width)
|
|
# total_height += line_height + line_spacing
|
|
|
|
# bounding_box = (x, y, x + max_width, y + total_height, words)
|
|
# print(f"Bounding Box of Interest: {bounding_box}")
|
|
|
|
# y = np.max([y,0])
|
|
# if len(bounding_boxes) > 0:
|
|
# for box in bounding_boxes:
|
|
# print(f'Investigating box: {box}')
|
|
# if intercepts((box[0],box[2]),(bounding_box[0],bounding_box[2])) and intercepts((box[1],box[3]),(y, y+total_height)):
|
|
# print(f'Overlapping change adjustment to {words}')
|
|
# y = np.max([y,box[3]]) + line_spacing
|
|
# print(y, box[3])
|
|
# print(f'Changed to {(x,y, x+max_width, y+total_height, words)}')
|
|
# adjusted_bounding_box = (x, y, x + max_width, y + total_height, words)
|
|
# bounding_boxes.append(adjusted_bounding_box)
|
|
# draw.rectangle([(x,y), (x+max_width, y+total_height)], outline="black", width=1)
|
|
# position = (x,y)
|
|
# for line in lines:
|
|
# draw.text(position, line, fill= TEXT_COLOR, font=font)
|
|
# y += FONT_SIZE + line_spacing
|
|
# position = (x,y)
|
|
# print("Adjusted_bounding_box:",adjusted_bounding_box)
|
|
# print('\n')
|
|
# translated_number += 1 |