################################################################################### ##### IMPORT LIBRARIES ##### import os, time, sys sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'helpers')) from translation import translate_Seq_LLM, translate_API_LLM, init_API_LLM, init_Seq_LLM from utils import printsc, convert_image_to_bytes, bytes_to_image from ocr import get_words, init_OCR, id_keep_source_lang from logging_config import logger from draw import modify_image_bytes from config import ADD_OVERLAY, SOURCE_LANG, TARGET_LANG, OCR_MODEL, OCR_USE_GPU, LOCAL_FILES_ONLY, REGION, INTERVAL, MAX_TRANSLATE, TRANSLATION_MODEL, FONT_SIZE, FONT_FILE, FONT_COLOUR from create_overlay import app, overlay from typing import Optional, List ################################################################################### from PySide6.QtCore import Qt, QPoint, QRect, QTimer, QThread, Signal from PySide6.QtGui import (QKeySequence, QShortcut, QAction, QPainter, QFont, QColor, QIcon, QImage, QPixmap) from PySide6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, QLabel, QSystemTrayIcon, QMenu) from dataclasses import dataclass class TranslationThread(QThread): translation_ready = Signal(list, list) # Signal to send translation results start_capture = Signal() end_capture = Signal() screen_capture = Signal(int, int, int, int) def __init__(self, ocr, models, source_lang, target_lang, interval): super().__init__() self.ocr = ocr self.models = models self.source_lang = source_lang self.target_lang = target_lang self.interval = interval self.running = True self.prev_words = set() self.runs = 0 def run(self): while self.running: self.start_capture.emit() untranslated_image = printsc(REGION) self.end_capture.emit() byte_image = convert_image_to_bytes(untranslated_image) ocr_output = id_keep_source_lang(self.ocr, byte_image, self.source_lang) if self.runs == 0: logger.info('Initial run') else: logger.info(f'Run number: {self.runs}.') self.runs += 1 curr_words = set(get_words(ocr_output)) if self.prev_words != curr_words: logger.info('Translating') to_translate = [entry[1] for entry in ocr_output][:MAX_TRANSLATE] translation = translate_API_LLM(to_translate, self.models) logger.info(f'Translation from {to_translate} to\n {translation}') # Emit the translation results modify_image_bytes(byte_image, ocr_output, translation) self.translation_ready.emit(ocr_output, translation) self.prev_words = curr_words else: logger.info("No new words to translate. Output will not refresh.") logger.info(f'Sleeping for {self.interval} seconds') time.sleep(self.interval) def stop(self): self.running = False def main(): # Initialize OCR OCR_LANGUAGES = [SOURCE_LANG, TARGET_LANG, 'en'] ocr = init_OCR(model=OCR_MODEL, easy_languages=OCR_LANGUAGES, use_GPU=OCR_USE_GPU) # Initialize translation models = init_API_LLM(SOURCE_LANG, TARGET_LANG) # Create and start translation thread translation_thread = TranslationThread( ocr=ocr, models=models, source_lang=SOURCE_LANG, target_lang=TARGET_LANG, interval=INTERVAL ) # Connect translation results to overlay update translation_thread.start_capture.connect(overlay.prepare_for_capture) translation_thread.end_capture.connect(overlay.restore_after_capture) translation_thread.translation_ready.connect(overlay.update_translation) translation_thread.screen_capture.connect(overlay.capture_behind) # Start the translation thread translation_thread.start() # Start Qt event loop result = app.exec() # Cleanup translation_thread.stop() translation_thread.wait() return result if __name__ == "__main__": sys.exit(main())