################################################################################### ##### IMPORT LIBRARIES ##### import os, time, sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'helpers')) from translation import translate_Seq_LLM, translate_API_LLM, init_API_LLM, init_Seq_LLM from utils import printsc, convert_image_to_bytes, bytes_to_image from ocr import get_words, init_OCR, id_keep_source_lang from logging_config import logger from draw import modify_image_bytes from config import SOURCE_LANG, TARGET_LANG, OCR_MODEL, OCR_USE_GPU, LOCAL_FILES_ONLY, REGION, INTERVAL, MAX_TRANSLATE, TRANSLATION_MODEL ################################################################################### latest_image = None def main(): global latest_image ##### Initialize the OCR ##### OCR_LANGUAGES = [SOURCE_LANG, TARGET_LANG, 'en'] ocr = init_OCR(model=OCR_MODEL, easy_languages = OCR_LANGUAGES, use_GPU=OCR_USE_GPU) ##### Initialize the translation ##### # model, tokenizer = init_Seq_LLM(TRANSLATION_MODEL, from_lang =SOURCE_LANG , target_lang = TARGET_LANG) models = init_API_LLM(TRANSLATION_MODEL) ################################################################################### runs = 0 while True: untranslated_image = printsc(REGION) byte_image = convert_image_to_bytes(untranslated_image) ocr_output = id_keep_source_lang(ocr, byte_image, SOURCE_LANG) # keep only phrases containing the source language if runs == 0: logger.info('Initial run') prev_words = set() else: logger.info(f'Run number: {runs}.') runs += 1 curr_words = set(get_words(ocr_output)) ### If the OCR detects different words, translate screen -> to ensure that the screen is not refreshing constantly and to save GPU power if prev_words != curr_words: logger.info('Translating') to_translate = [entry[1] for entry in ocr_output][:MAX_TRANSLATE] # translation = translate_Seq_LLM(to_translate, model_type = TRANSLATION_MODEL, model = model, tokenizer = tokenizer, from_lang = SOURCE_LANG, target_lang = TARGET_LANG) translation = translate_API_LLM(to_translate, TRANSLATION_MODEL, models, from_lang = SOURCE_LANG, target_lang = TARGET_LANG) logger.info(f'Translation from {to_translate} to\n {translation}') translated_image = modify_image_bytes(byte_image, ocr_output, translation) latest_image = bytes_to_image(translated_image) # latest_image.show() # for debugging prev_words = curr_words else: logger.info("No new words to translate. Output will not refresh.") logger.info(f'Sleeping for {INTERVAL} seconds') time.sleep(INTERVAL) ################### TODO ################## # 3. Quantising/finetuning larger LLMs. Consider using Aya-23-8B, Gemma, llama3.2 models. # 5. Maybe refreshing issue of flask app. Also get webpage to update only if the image changes. # Create a way for it to just replace the text and provide only the translation on-screen. Qt6 if __name__ == "__main__": main()