83 lines
3.4 KiB
Python
83 lines
3.4 KiB
Python
###################################################################################
|
|
##### IMPORT LIBRARIES #####
|
|
import os, time, sys
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'helpers'))
|
|
|
|
from translation import translate_Seq_LLM, translate_API_LLM, init_API_LLM, init_Seq_LLM
|
|
from utils import printsc, convert_image_to_bytes, bytes_to_image
|
|
from ocr import get_words, init_OCR, id_keep_source_lang
|
|
from logging_config import logger
|
|
from draw import modify_image_bytes
|
|
from config import ADD_OVERLAY, SOURCE_LANG, TARGET_LANG, OCR_MODEL, OCR_USE_GPU, LOCAL_FILES_ONLY, REGION, INTERVAL, MAX_TRANSLATE, TRANSLATION_MODEL
|
|
###################################################################################
|
|
|
|
ADD_OVERLAY = False
|
|
|
|
latest_image = None
|
|
|
|
def main():
|
|
global latest_image
|
|
|
|
##### Initialize the OCR #####
|
|
OCR_LANGUAGES = [SOURCE_LANG, TARGET_LANG, 'en']
|
|
ocr = init_OCR(model=OCR_MODEL, easy_languages = OCR_LANGUAGES, use_GPU=OCR_USE_GPU)
|
|
|
|
##### Initialize the translation #####
|
|
# model, tokenizer = init_Seq_LLM(TRANSLATION_MODEL, from_lang =SOURCE_LANG , target_lang = TARGET_LANG)
|
|
models = init_API_LLM(SOURCE_LANG, TARGET_LANG)
|
|
###################################################################################
|
|
runs = 0
|
|
app.exec()
|
|
while True:
|
|
if ADD_OVERLAY:
|
|
overlay.clear_all_text()
|
|
|
|
untranslated_image = printsc(REGION)
|
|
|
|
if ADD_OVERLAY:
|
|
overlay.text_entries = overlay.text_entries_copy
|
|
overlay.update()
|
|
overlay.text_entries.clear()
|
|
|
|
byte_image = convert_image_to_bytes(untranslated_image)
|
|
ocr_output = id_keep_source_lang(ocr, byte_image, SOURCE_LANG) # keep only phrases containing the source language
|
|
|
|
if runs == 0:
|
|
logger.info('Initial run')
|
|
prev_words = set()
|
|
else:
|
|
logger.info(f'Run number: {runs}.')
|
|
runs += 1
|
|
|
|
curr_words = set(get_words(ocr_output))
|
|
|
|
### If the OCR detects different words, translate screen -> to ensure that the screen is not refreshing constantly and to save GPU power
|
|
if prev_words != curr_words:
|
|
logger.info('Translating')
|
|
|
|
to_translate = [entry[1] for entry in ocr_output][:MAX_TRANSLATE]
|
|
# translation = translate_Seq_LLM(to_translate, model_type = TRANSLATION_MODEL, model = model, tokenizer = tokenizer, from_lang = SOURCE_LANG, target_lang = TARGET_LANG)
|
|
translation = translate_API_LLM(to_translate, models)
|
|
logger.info(f'Translation from {to_translate} to\n {translation}')
|
|
translated_image = modify_image_bytes(byte_image, ocr_output, translation)
|
|
latest_image = bytes_to_image(translated_image)
|
|
# latest_image.show() # for debugging
|
|
|
|
prev_words = curr_words
|
|
else:
|
|
logger.info("No new words to translate. Output will not refresh.")
|
|
|
|
logger.info(f'Sleeping for {INTERVAL} seconds')
|
|
time.sleep(INTERVAL)
|
|
# if ADD_OVERLAY:
|
|
# sys.exit(app.exec())
|
|
|
|
################### TODO ##################
|
|
# 3. Quantising/finetuning larger LLMs. Consider using Aya-23-8B, Gemma, llama3.2 models.
|
|
# 5. Maybe refreshing issue of flask app. Also get webpage to update only if the image changes.
|
|
# Create a way for it to just replace the text and provide only the translation on-screen. Qt6
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|
|
|