onscreen-translator/qt_app.py

149 lines
6.1 KiB
Python

import config, asyncio, sys, os, time, numpy as np, qt_app, web_app
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'helpers'))
from translation import translate_Seq_LLM, translate_API_LLM, init_API_LLM, init_Seq_LLM
from utils import printsc, convert_image_to_bytes, bytes_to_image, check_similarity, is_wayland
from ocr import get_words, init_OCR, id_keep_source_lang
from data import Base, engine, create_tables
from draw import modify_image
from config import (SOURCE_LANG, TARGET_LANG, OCR_MODEL, OCR_USE_GPU, LOCAL_FILES_ONLY,
REGION, INTERVAL, MAX_TRANSLATE, TRANSLATION_MODEL,
IMAGE_CHANGE_THRESHOLD, TEMP_IMG_PATH)
from logging_config import logger
from PySide6.QtWidgets import QMainWindow, QLabel, QVBoxLayout, QWidget, QApplication
from PySide6.QtCore import Qt, QThread, Signal
from PySide6.QtGui import QPixmap, QImage
class MainWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Translator")
# Create main widget and layout
main_widget = QWidget()
self.setCentralWidget(main_widget)
layout = QVBoxLayout(main_widget)
# Create image label
self.image_label = QLabel()
layout.addWidget(self.image_label)
# Set up image generator thread
self.generator = qt_app.ImageGenerator()
self.generator.image_ready.connect(self.update_image)
self.generator.start()
# Set initial window size
window_width, width_height = REGION[2] - REGION[0], REGION[3] - REGION[1]
self.resize(window_width, width_height)
def update_image(self, image_buffer):
"""Update the displayed image directly from buffer bytes"""
if image_buffer is None:
return
# Convert buffer to QImage
q_image = QImage.fromData(image_buffer)
if q_image.isNull():
logger.error("Failed to create QImage from buffer")
return
# Convert QImage to QPixmap and display it
pixmap = QPixmap.fromImage(q_image)
# Scale the pixmap to fit the label while maintaining aspect ratio
scaled_pixmap = pixmap.scaled(
self.image_label.size(),
Qt.KeepAspectRatio,
Qt.SmoothTransformation
)
self.image_label.setPixmap(scaled_pixmap)
class ImageGenerator(QThread):
"""Thread for generating images continuously"""
image_ready = Signal(np.ndarray)
def __init__(self):
super().__init__()
printsc(REGION, TEMP_IMG_PATH)
self.running = True
self.OCR_LANGUAGES = [SOURCE_LANG, 'en']
self.ocr = init_OCR(model=OCR_MODEL, paddle_lang= SOURCE_LANG, easy_languages = self.OCR_LANGUAGES, use_GPU=OCR_USE_GPU)
self.ocr_output = id_keep_source_lang(self.ocr, TEMP_IMG_PATH, SOURCE_LANG)
self.models = init_API_LLM(SOURCE_LANG, TARGET_LANG)
# self.model, self.tokenizer = init_Seq_LLM(TRANSLATION_MODEL, from_lang =SOURCE_LANG , target_lang = TARGET_LANG)
self.runs = 0
self.prev_words = set()
self.curr_words = set(get_words(self.ocr_output))
self.translated_image = None
def run(self):
asyncio.run(self.async_run())
async def async_run(self):
while self.running:
logger.debug("Capturing screen")
printsc(REGION, TEMP_IMG_PATH)
logger.debug(f"Screen Captured. Proceeding to perform OCR.")
self.ocr_output = id_keep_source_lang(self.ocr, TEMP_IMG_PATH, SOURCE_LANG) # keep only phrases containing the source language
logger.debug(f"OCR completed. Detected {len(self.ocr_output)} phrases.")
if self.runs == 0:
logger.info('Initial run')
self.prev_words = set()
else:
logger.debug(f'Run number: {self.runs}.')
self.runs += 1
self.curr_words = set(get_words(self.ocr_output))
logger.debug(f'Current words: {self.curr_words} Previous words: {self.prev_words}')
### If the OCR detects different words, translate screen -> to ensure that the screen is not refreshing constantly and to save GPU power
if self.prev_words != self.curr_words and not check_similarity(list(self.curr_words), list(self.prev_words), threshold = IMAGE_CHANGE_THRESHOLD, method="tfidf"):
logger.info('Beginning Translation')
to_translate = [entry[1] for entry in self.ocr_output][:MAX_TRANSLATE]
# translation = translate_Seq_LLM(to_translate, model_type = TRANSLATION_MODEL, model = self.model, tokenizer = self.tokenizer, from_lang = SOURCE_LANG, target_lang = TARGET_LANG)
try:
translation = await translate_API_LLM(to_translate, self.models, call_size = 3)
except TypeError as e:
logger.error(f"Failed to translate using API models. Error: {e}. Sleeping for {2*INTERVAL} seconds.")
time.sleep(2*INTERVAL)
continue
logger.debug('Translation complete. Modifying image.')
self.translated_image = modify_image(TEMP_IMG_PATH, self.ocr_output, translation)
# view_buffer_app.show_buffer_image(translated_image, label)
logger.debug("Image modified. Saving image.")
self.prev_words = self.curr_words
else:
logger.info(f"Skipping translation. No significant change in the screen detected. Total translation attempts so far: {self.runs}.")
logger.debug("Continuing to next iteration.")
time.sleep(INTERVAL)
self.image_ready.emit(self.translated_image)
def stop(self):
self.running = False
self.wait()
def closeEvent(self, event):
"""Clean up when closing the window"""
self.generator.stop()
event.accept()
def qt_app_main():
app = QApplication(sys.argv)
window = MainWindow()
window.show()
sys.exit(app.exec())
if __name__ == "__main__":
qt_app_main()