onscreen-translator/qtapp.py

115 lines
4.3 KiB
Python

###################################################################################
##### IMPORT LIBRARIES #####
import os, time, sys
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'helpers'))
from translation import translate_Seq_LLM, translate_API_LLM, init_API_LLM, init_Seq_LLM
from utils import printsc, convert_image_to_bytes, bytes_to_image
from ocr import get_words, init_OCR, id_keep_source_lang
from logging_config import logger
from draw import modify_image_bytes
from config import ADD_OVERLAY, SOURCE_LANG, TARGET_LANG, OCR_MODEL, OCR_USE_GPU, LOCAL_FILES_ONLY, REGION, INTERVAL, MAX_TRANSLATE, TRANSLATION_MODEL, FONT_SIZE, FONT_FILE, FONT_COLOUR
from create_overlay import app, overlay
from typing import Optional, List
###################################################################################
from PySide6.QtCore import Qt, QPoint, QRect, QTimer, QThread, Signal
from PySide6.QtGui import (QKeySequence, QShortcut, QAction, QPainter, QFont,
QColor, QIcon, QImage, QPixmap)
from PySide6.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
QLabel, QSystemTrayIcon, QMenu)
from dataclasses import dataclass
class TranslationThread(QThread):
translation_ready = Signal(list, list) # Signal to send translation results
start_capture = Signal()
end_capture = Signal()
screen_capture = Signal(int, int, int, int)
def __init__(self, ocr, models, source_lang, target_lang, interval):
super().__init__()
self.ocr = ocr
self.models = models
self.source_lang = source_lang
self.target_lang = target_lang
self.interval = interval
self.running = True
self.prev_words = set()
self.runs = 0
def run(self):
while self.running:
self.start_capture.emit()
untranslated_image = printsc(REGION)
self.end_capture.emit()
byte_image = convert_image_to_bytes(untranslated_image)
ocr_output = id_keep_source_lang(self.ocr, byte_image, self.source_lang)
if self.runs == 0:
logger.info('Initial run')
else:
logger.info(f'Run number: {self.runs}.')
self.runs += 1
curr_words = set(get_words(ocr_output))
if self.prev_words != curr_words:
logger.info('Translating')
to_translate = [entry[1] for entry in ocr_output][:MAX_TRANSLATE]
translation = translate_API_LLM(to_translate, self.models)
logger.info(f'Translation from {to_translate} to\n {translation}')
# Emit the translation results
modify_image_bytes(byte_image, ocr_output, translation)
self.translation_ready.emit(ocr_output, translation)
self.prev_words = curr_words
else:
logger.info("No new words to translate. Output will not refresh.")
logger.info(f'Sleeping for {self.interval} seconds')
time.sleep(self.interval)
def stop(self):
self.running = False
def main():
# Initialize OCR
OCR_LANGUAGES = [SOURCE_LANG, TARGET_LANG, 'en']
ocr = init_OCR(model=OCR_MODEL, easy_languages=OCR_LANGUAGES, use_GPU=OCR_USE_GPU)
# Initialize translation
models = init_API_LLM(SOURCE_LANG, TARGET_LANG)
# Create and start translation thread
translation_thread = TranslationThread(
ocr=ocr,
models=models,
source_lang=SOURCE_LANG,
target_lang=TARGET_LANG,
interval=INTERVAL
)
# Connect translation results to overlay update
translation_thread.start_capture.connect(overlay.prepare_for_capture)
translation_thread.end_capture.connect(overlay.restore_after_capture)
translation_thread.translation_ready.connect(overlay.update_translation)
translation_thread.screen_capture.connect(overlay.capture_behind)
# Start the translation thread
translation_thread.start()
# Start Qt event loop
result = app.exec()
# Cleanup
translation_thread.stop()
translation_thread.wait()
return result
if __name__ == "__main__":
sys.exit(main())