147 lines
5.9 KiB
Python
147 lines
5.9 KiB
Python
import config, asyncio, sys, os, time, numpy as np, qt_app, web_app
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'helpers'))
|
|
|
|
from translation import translate_Seq_LLM, translate_API_LLM, init_API_LLM, init_Seq_LLM
|
|
from utils import printsc, convert_image_to_bytes, bytes_to_image, check_similarity, is_wayland
|
|
from ocr import get_words, init_OCR, id_keep_source_lang
|
|
from data import Base, engine, create_tables
|
|
from draw import modify_image
|
|
|
|
from config import (SOURCE_LANG, TARGET_LANG, OCR_MODEL, OCR_USE_GPU, LOCAL_FILES_ONLY,
|
|
REGION, INTERVAL, MAX_TRANSLATE, TRANSLATION_MODEL,
|
|
IMAGE_CHANGE_THRESHOLD, TEMP_IMG_PATH)
|
|
from logging_config import logger
|
|
from PySide6.QtWidgets import QMainWindow, QLabel, QVBoxLayout, QWidget, QApplication
|
|
from PySide6.QtCore import Qt, QThread, Signal
|
|
from PySide6.QtGui import QPixmap, QImage
|
|
|
|
|
|
class MainWindow(QMainWindow):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.setWindowTitle("Translator")
|
|
|
|
# Create main widget and layout
|
|
main_widget = QWidget()
|
|
self.setCentralWidget(main_widget)
|
|
layout = QVBoxLayout(main_widget)
|
|
|
|
# Create image label
|
|
self.image_label = QLabel()
|
|
layout.addWidget(self.image_label)
|
|
|
|
# Set up image generator thread
|
|
self.generator = qt_app.ImageGenerator()
|
|
self.generator.image_ready.connect(self.update_image)
|
|
self.generator.start()
|
|
|
|
# Set initial window size
|
|
window_width, width_height = REGION[2] - REGION[0], REGION[3] - REGION[1]
|
|
|
|
self.resize(window_width, width_height)
|
|
|
|
def update_image(self, image_buffer):
|
|
"""Update the displayed image directly from buffer bytes"""
|
|
if image_buffer is None:
|
|
return
|
|
|
|
# Convert buffer to QImage
|
|
q_image = QImage.fromData(image_buffer)
|
|
|
|
if q_image.isNull():
|
|
logger.error("Failed to create QImage from buffer")
|
|
return
|
|
|
|
# Convert QImage to QPixmap and display it
|
|
pixmap = QPixmap.fromImage(q_image)
|
|
|
|
# Scale the pixmap to fit the label while maintaining aspect ratio
|
|
scaled_pixmap = pixmap.scaled(
|
|
self.image_label.size(),
|
|
Qt.KeepAspectRatio,
|
|
Qt.SmoothTransformation
|
|
)
|
|
|
|
self.image_label.setPixmap(scaled_pixmap)
|
|
|
|
|
|
class ImageGenerator(QThread):
|
|
"""Thread for generating images continuously"""
|
|
image_ready = Signal(np.ndarray)
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.running = True
|
|
self.OCR_LANGUAGES = [SOURCE_LANG, 'en']
|
|
self.ocr = init_OCR(model=OCR_MODEL, paddle_lang= SOURCE_LANG, easy_languages = self.OCR_LANGUAGES, use_GPU=OCR_USE_GPU)
|
|
self.ocr_output = id_keep_source_lang(self.ocr, TEMP_IMG_PATH, SOURCE_LANG)
|
|
self.models = init_API_LLM(SOURCE_LANG, TARGET_LANG)
|
|
self.runs = 0
|
|
self.prev_words = set()
|
|
self.curr_words = set(get_words(self.ocr_output))
|
|
self.translated_image = None
|
|
|
|
def run(self):
|
|
asyncio.run(self.async_run())
|
|
|
|
async def async_run(self):
|
|
|
|
while self.running:
|
|
logger.debug("Capturing screen")
|
|
printsc(REGION, TEMP_IMG_PATH)
|
|
logger.debug(f"Screen Captured. Proceeding to perform OCR.")
|
|
self.ocr_output = id_keep_source_lang(self.ocr, TEMP_IMG_PATH, SOURCE_LANG) # keep only phrases containing the source language
|
|
logger.debug(f"OCR completed. Detected {len(self.ocr_output)} phrases.")
|
|
if self.runs == 0:
|
|
logger.info('Initial run')
|
|
self.prev_words = set()
|
|
else:
|
|
logger.debug(f'Run number: {self.runs}.')
|
|
self.runs += 1
|
|
|
|
self.curr_words = set(get_words(self.ocr_output))
|
|
logger.debug(f'Current words: {self.curr_words} Previous words: {self.prev_words}')
|
|
### If the OCR detects different words, translate screen -> to ensure that the screen is not refreshing constantly and to save GPU power
|
|
if self.prev_words != self.curr_words and not check_similarity(list(self.curr_words), list(self.prev_words), threshold = IMAGE_CHANGE_THRESHOLD, method="tfidf"):
|
|
logger.info('Beginning Translation')
|
|
|
|
to_translate = [entry[1] for entry in self.ocr_output][:MAX_TRANSLATE]
|
|
# translation = translate_Seq_LLM(to_translate, model_type = TRANSLATION_MODEL, model = model, tokenizer = tokenizer, from_lang = SOURCE_LANG, target_lang = TARGET_LANG)
|
|
try:
|
|
translation = await translate_API_LLM(to_translate, self.models, call_size = 3)
|
|
except TypeError as e:
|
|
logger.error(f"Failed to translate using API models. Error: {e}. Sleeping for {2*INTERVAL} seconds.")
|
|
time.sleep(2*INTERVAL)
|
|
continue
|
|
logger.debug('Translation complete. Modifying image.')
|
|
self.translated_image = modify_image(TEMP_IMG_PATH, self.ocr_output, translation)
|
|
# view_buffer_app.show_buffer_image(translated_image, label)
|
|
logger.debug("Image modified. Saving image.")
|
|
self.prev_words = self.curr_words
|
|
else:
|
|
logger.info(f"Skipping translation. No significant change in the screen detected. Total translation attempts so far: {self.runs}.")
|
|
logger.debug("Continuing to next iteration.")
|
|
time.sleep(INTERVAL)
|
|
self.image_ready.emit(self.translated_image)
|
|
|
|
def stop(self):
|
|
self.running = False
|
|
self.wait()
|
|
|
|
|
|
|
|
def closeEvent(self, event):
|
|
"""Clean up when closing the window"""
|
|
self.generator.stop()
|
|
event.accept()
|
|
|
|
|
|
def qt_app_main():
|
|
app = QApplication(sys.argv)
|
|
window = MainWindow()
|
|
window.show()
|
|
sys.exit(app.exec())
|
|
|
|
|
|
if __name__ == "__main__":
|
|
qt_app_main() |