import config, asyncio, sys, os, time, numpy as np, qt_app, web_app sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'helpers')) from translation import translate_Seq_LLM, translate_API_LLM, init_API_LLM, init_Seq_LLM from utils import printsc, convert_image_to_bytes, bytes_to_image, check_similarity, is_wayland from ocr import get_words, init_OCR, id_keep_source_lang from data import Base, engine, create_tables from draw import modify_image from config import (SOURCE_LANG, TARGET_LANG, OCR_MODEL, OCR_USE_GPU, LOCAL_FILES_ONLY, REGION, INTERVAL, MAX_TRANSLATE, TRANSLATION_MODEL, IMAGE_CHANGE_THRESHOLD, TEMP_IMG_PATH) from logging_config import logger from PySide6.QtWidgets import QMainWindow, QLabel, QVBoxLayout, QWidget, QApplication from PySide6.QtCore import Qt, QThread, Signal from PySide6.QtGui import QPixmap, QImage class MainWindow(QMainWindow): def __init__(self): super().__init__() self.setWindowTitle("Translator") # Create main widget and layout main_widget = QWidget() self.setCentralWidget(main_widget) layout = QVBoxLayout(main_widget) # Create image label self.image_label = QLabel() layout.addWidget(self.image_label) # Set up image generator thread self.generator = qt_app.ImageGenerator() self.generator.image_ready.connect(self.update_image) self.generator.start() # Set initial window size window_width, width_height = REGION[2] - REGION[0], REGION[3] - REGION[1] self.resize(window_width, width_height) def update_image(self, image_buffer): """Update the displayed image directly from buffer bytes""" if image_buffer is None: return # Convert buffer to QImage q_image = QImage.fromData(image_buffer) if q_image.isNull(): logger.error("Failed to create QImage from buffer") return # Convert QImage to QPixmap and display it pixmap = QPixmap.fromImage(q_image) # Scale the pixmap to fit the label while maintaining aspect ratio scaled_pixmap = pixmap.scaled( self.image_label.size(), Qt.KeepAspectRatio, Qt.SmoothTransformation ) self.image_label.setPixmap(scaled_pixmap) class ImageGenerator(QThread): """Thread for generating images continuously""" image_ready = Signal(np.ndarray) def __init__(self): super().__init__() self.running = True self.OCR_LANGUAGES = [SOURCE_LANG, 'en'] self.ocr = init_OCR(model=OCR_MODEL, paddle_lang= SOURCE_LANG, easy_languages = self.OCR_LANGUAGES, use_GPU=OCR_USE_GPU) self.ocr_output = id_keep_source_lang(self.ocr, TEMP_IMG_PATH, SOURCE_LANG) self.models = init_API_LLM(SOURCE_LANG, TARGET_LANG) self.runs = 0 self.prev_words = set() self.curr_words = set(get_words(self.ocr_output)) self.translated_image = None def run(self): asyncio.run(self.async_run()) async def async_run(self): while self.running: logger.debug("Capturing screen") printsc(REGION, TEMP_IMG_PATH) logger.debug(f"Screen Captured. Proceeding to perform OCR.") self.ocr_output = id_keep_source_lang(self.ocr, TEMP_IMG_PATH, SOURCE_LANG) # keep only phrases containing the source language logger.debug(f"OCR completed. Detected {len(self.ocr_output)} phrases.") if self.runs == 0: logger.info('Initial run') self.prev_words = set() else: logger.debug(f'Run number: {self.runs}.') self.runs += 1 self.curr_words = set(get_words(self.ocr_output)) logger.debug(f'Current words: {self.curr_words} Previous words: {self.prev_words}') ### If the OCR detects different words, translate screen -> to ensure that the screen is not refreshing constantly and to save GPU power if self.prev_words != self.curr_words and not check_similarity(list(self.curr_words), list(self.prev_words), threshold = IMAGE_CHANGE_THRESHOLD, method="tfidf"): logger.info('Beginning Translation') to_translate = [entry[1] for entry in self.ocr_output][:MAX_TRANSLATE] # translation = translate_Seq_LLM(to_translate, model_type = TRANSLATION_MODEL, model = model, tokenizer = tokenizer, from_lang = SOURCE_LANG, target_lang = TARGET_LANG) try: translation = await translate_API_LLM(to_translate, self.models, call_size = 3) except TypeError as e: logger.error(f"Failed to translate using API models. Error: {e}. Sleeping for {2*INTERVAL} seconds.") time.sleep(2*INTERVAL) continue logger.debug('Translation complete. Modifying image.') self.translated_image = modify_image(TEMP_IMG_PATH, self.ocr_output, translation) # view_buffer_app.show_buffer_image(translated_image, label) logger.debug("Image modified. Saving image.") self.prev_words = self.curr_words else: logger.info(f"Skipping translation. No significant change in the screen detected. Total translation attempts so far: {self.runs}.") logger.debug("Continuing to next iteration.") time.sleep(INTERVAL) self.image_ready.emit(self.translated_image) def stop(self): self.running = False self.wait() def closeEvent(self, event): """Clean up when closing the window""" self.generator.stop() event.accept() def qt_app_main(): app = QApplication(sys.argv) window = MainWindow() window.show() sys.exit(app.exec()) if __name__ == "__main__": qt_app_main()