Bug with OPUS model

2024-11-09 00:10:43 +11:00 · 2024-11-09 00:10:43 +11:00 · 66bc8f205c
commit 66bc8f205c
parent ecc264cf65
7 changed files with 31 additions and 24 deletions
--- a/README.md
+++ b/README.md
@ -1,10 +1,10 @@
 ## What does this do?

-It provides translations from a source language to another language of a specified region on your screen while also providing necessary romanisation (including pinyin and furigana) to provide a guide to pronounciation. The main goal of this is primarily for people that have a low/basic level of understanding of a language to further develop that language by allowing the users to have the tool to allow them to immerse themselves in native content. Main uses of this include but are not limited to: playing games and watching videos with subtitles in another language (although technically it might just be better to obtain an audio transcription, translate and replace the subtitles if possible -- however this is not always feasible if watching many episodes and/or you are watching videos spontaneously).
+It continuously provides translations from a source language to another language of a specified region on your screen while also (optionally) providing romanisation (including pinyin and furigana) to provide a guide to pronounciation. The main goal of this is primarily for people that have a low/basic level of understanding of a language to further develop that language by allowing the users to have the tool to allow them to immerse themselves in native content. Main uses of this include but are not limited to: playing games and watching videos with subtitles in another language (although technically it might just be better to obtain an audio transcription, translate and replace the subtitles if possible -- however this is not always feasible if watching many episodes and/or you are watching videos spontaneously).

 ## Limitations

-If the `learn` mode is enabled for the app, the added translations and romanisation naturally results in texts taking up three times the space and therefore this is less suitable for texts that contain tightly packed words. You can optionally change the config to insert smaller text or change the overall font size of your screen so there is less text. A pure translation mode also exists, although if it is intended for web browsing, Google itself provides a more reliable method of translation which does not rely on the computationally heavy optical character recognition (OCR).
+If the `learn` mode is enabled for the app, the added translations and romanisation naturally results in texts taking up three times the space and therefore this is less suitable for texts that contain tightly packed words. You can optionally change the config to insert smaller text or change the overall font size of your screen so there are less text. A pure translation mode also exists, although if it is intended for web browsing, Google itself provides a more reliable method of translation which does not rely on the computationally heavy optical character recognition (OCR).

 ## Usage (draft)

@ -54,13 +54,12 @@ Screenshotting is limited in Wayland, and `grim` is one of the more lightweight
 ## TODO:

 -   Create an overlay window that works in Wayland.
-   Make use of the translation data -> maybe make a personalised game that uses
+-   Make use of the translation data -> maybe make a personalised game that uses the data.
+-   Providing the option for simplifying and automating most of the install process.

-## Terms of Use
+# Terms of Use

-By using this application, you agree to the following terms and conditions.
-
-# Data Collection and External API Use
+## Data Collection and External API Use

 1.1 Onscreen Data Transmission: The application is designed to send data displayed on your screen, including potentially sensitive or personal information, to an external API if local processing is not setup.

--- a/helpers/batching.py
+++ b/helpers/batching.py
@ -435,11 +435,13 @@ def generate_text(
    return all_generated_texts

 if __name__ == '__main__':
-    GROQ_API_KEY = os.getenv('GROQ_API_KEY')
-    GEMINI_API_KEY = os.getenv('GEMINI_API_KEY')
-    groq = Groqq('gemma-7b-it', 15, GROQ_API_KEY)
-    groq.set_lang('zh','en')
-    gemini = Gemini('gemini-1.5-pro', 15, GEMINI_API_KEY)
-    gemini.set_lang('zh','en')
-    print(gemini.translate(['荷兰咯']))
-    print(groq.translate(['荷兰咯']))
+    from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, M2M100Tokenizer, M2M100ForConditionalGeneration
+    opus_model = 'Helsinki-NLP/opus-mt-en-zh'
+    LOCAL_FILES_ONLY = True
+    tokenizer = AutoTokenizer.from_pretrained(opus_model, local_files_only=LOCAL_FILES_ONLY)
+    model = AutoModelForSeq2SeqLM.from_pretrained(opus_model, local_files_only=LOCAL_FILES_ONLY, torch_dtype=torch.float16).to(device)
+    # tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M", local_files_only=LOCAL_FILES_ONLY)
+    # tokenizer.src_lang = "en"
+    # model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M", local_files_only=LOCAL_FILES_ONLY).to(device)
+    
+    print(generate_text([ i.lower().capitalize() for i in ['placeholder','Story','StoRY', 'TufoRIaL', 'CovFfG', 'LoaD DaTA', 'SAME DATa', 'ReTulN@TitIE', 'View', '@niirm', 'SysceM', 'MeNu:', 'MaND', 'CoM', 'SeLEcT', 'Frogguingang', 'Tutorias', 'Back']], model, tokenizer))
--- a/helpers/ocr.py
+++ b/helpers/ocr.py
@ -87,11 +87,12 @@ def _id_filtered(ocr, image, lang) -> list:
 def _id_lang(ocr, image, lang) -> list:
    result = _identify(ocr, image)
    lang = standardize_lang(lang)['id_model_lang']
-    try:
-        filtered = [entry for entry in result if contains_lang(entry[1], lang)]
-    except:
-        logger.error(f"Selected language not part of default: {default_languages}.")
-        raise ValueError(f"Selected language not part of default: {default_languages}.")
+    # try:
+    logger.info(f"Filtering out phrases not in {lang}.")
+    filtered = [entry for entry in result if contains_lang(entry[1], lang)]
+    # except:
+        # logger.error(f"Selected language not part of default: {default_languages}.")
+        # raise ValueError(f"Selected language not part of default: {default_languages}.")
    return filtered

 def id_keep_source_lang(ocr, image, lang) -> list:
--- a/helpers/translation.py
+++ b/helpers/translation.py
@ -143,15 +143,17 @@ def get_OPUS_model(from_lang, target_lang):

 def init_OPUS(from_lang = 'ch_sim', target_lang = 'en'):
    opus_model = get_OPUS_model(from_lang, target_lang)
+    logger.debug(f"OPUS model: {opus_model}")
    tokenizer = AutoTokenizer.from_pretrained(opus_model, local_files_only=LOCAL_FILES_ONLY)
    model = AutoModelForSeq2SeqLM.from_pretrained(opus_model, local_files_only=LOCAL_FILES_ONLY, torch_dtype=torch.float16).to(device)
    model.eval()
    return (model, tokenizer)

 def translate_OPUS(text: list[str], model, tokenizer) -> list[str]:
-    translated_text = generate_text(model,tokenizer, text, 
+    translated_text = generate_text(text, model,tokenizer, 
                  batch_size=BATCH_SIZE, device=device, 
                  max_length=MAX_INPUT_TOKENS, max_new_tokens=MAX_OUTPUT_TOKENS)
+    logger.debug(f"Translated text: {translated_text}")
    return translated_text

 ###############################
@ -174,6 +176,7 @@ def translate_Seq_LLM(text,
                        model,
                        tokenizer,
                       **kwargs):
+    text = [t.lower().capitalize() for t in text]
    if model_type == 'opus':
        return translate_OPUS(text, model, tokenizer)
    elif model_type == 'm2m':
--- a/helpers/utils.py
+++ b/helpers/utils.py
@ -28,7 +28,7 @@ def is_wayland():

 #  please install grim otherwise this is way too slow for wayland
 def printsc_wayland(region: tuple, path: str):
-    subprocess.run(['grim','-g', f'{region[0]},{region[1]} {region[2]-region[0]}x{region[3]-region[1]}', '-t', 'jpeg', '-q','100', path])
+    subprocess.run(['grim','-g', f'{region[0]},{region[1]} {region[2]-region[0]}x{region[3]-region[1]}', '-t', 'jpeg', '-q','90', path])

 def printsc_non_wayland(region: tuple, path: str):
    # use mss to capture the screen
--- a/logging_config.py
+++ b/logging_config.py
@ -64,5 +64,5 @@ def setup_logger(
        print(f"Failed to setup logger: {e}")
        return None
    
-logger = setup_logger('on_screen_translator', log_file='translate.log', level=logging.DEBUG)
+logger = setup_logger('on_screen_translator', log_file='translate.log', level=logging.INFO)

--- a/qt_app.py
+++ b/qt_app.py
@ -71,11 +71,13 @@ class ImageGenerator(QThread):
    
    def __init__(self):
        super().__init__()
+        printsc(REGION, TEMP_IMG_PATH)
        self.running = True
        self.OCR_LANGUAGES = [SOURCE_LANG, 'en']
        self.ocr = init_OCR(model=OCR_MODEL, paddle_lang= SOURCE_LANG, easy_languages = self.OCR_LANGUAGES, use_GPU=OCR_USE_GPU)
        self.ocr_output = id_keep_source_lang(self.ocr, TEMP_IMG_PATH, SOURCE_LANG)
        self.models = init_API_LLM(SOURCE_LANG, TARGET_LANG)
+        # self.model, self.tokenizer = init_Seq_LLM(TRANSLATION_MODEL, from_lang =SOURCE_LANG , target_lang = TARGET_LANG)
        self.runs = 0
        self.prev_words = set()
        self.curr_words = set(get_words(self.ocr_output))
@ -106,7 +108,7 @@ class ImageGenerator(QThread):
                logger.info('Beginning Translation')

                to_translate = [entry[1] for entry in self.ocr_output][:MAX_TRANSLATE]
-                # translation = translate_Seq_LLM(to_translate, model_type = TRANSLATION_MODEL, model = model, tokenizer = tokenizer, from_lang = SOURCE_LANG, target_lang = TARGET_LANG)
+                # translation = translate_Seq_LLM(to_translate, model_type = TRANSLATION_MODEL, model = self.model, tokenizer = self.tokenizer, from_lang = SOURCE_LANG, target_lang = TARGET_LANG)
                try:
                    translation = await translate_API_LLM(to_translate, self.models, call_size = 3)
                except TypeError as e: