onscreen-translator/helpers/ocr.py

from paddleocr import PaddleOCR
import easyocr
from typing import Optional
from rapidocr_onnxruntime import RapidOCR
import langid, sys,os
from utils import contains_lang, standardize_lang
from concurrent.futures import ThreadPoolExecutor
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from logging_config import logger
# PaddleOCR
# Paddleocr supports Chinese, English, French, German, Korean and Japanese.
# You can set the parameter `lang` as `ch`, `en`, `fr`, `german`, `korean`, `japan`
# to switch the language model in order.
# need to run only once to download and load model into memory

default_languages = ['en', 'ch', 'ja', 'ko']


def _paddle_init(paddle_lang, use_angle_cls=False, use_GPU=True, **kwargs):
    return PaddleOCR(use_angle_cls=use_angle_cls, lang=paddle_lang, use_GPU=use_GPU, **kwargs)


def _paddle_ocr(ocr, image) -> list:

    ### return a list containing the bounding box, text and confidence of the detected text
    result = ocr.ocr(image, cls=False)[0]
    if not isinstance(result, list):
        return []
    result = [ (pos, text[0], text[1]) for pos, text in result]
    return result

# EasyOCR has support for many languages

def _easy_init(easy_languages: list, use_GPU=True, **kwargs):
    langs = []
    for lang in easy_languages:
        langs.append(standardize_lang(lang)['easyocr_lang'])
    return easyocr.Reader(langs, gpu=use_GPU, **kwargs)

def _easy_ocr(ocr,image) -> list:
    return ocr.readtext(image)

# RapidOCR mostly for mandarin and some other asian languages

def _rapid_init(use_GPU=True, **kwargs):
    return RapidOCR(use_gpu=use_GPU, **kwargs)

def _rapid_ocr(ocr, image) -> list:
    return ocr(image)

### Initialize the OCR model
def init_OCR(model='paddle', easy_languages: Optional[list] = ['ch_sim','en'], paddle_lang: Optional[str] = 'ch', use_GPU=True, **kwargs):
    if model == 'paddle':
        return _paddle_init(paddle_lang=paddle_lang, use_GPU=use_GPU)
    elif model == 'easy':
        return _easy_init(easy_languages=easy_languages, use_GPU=use_GPU)
    elif model == 'rapid':
        return _rapid_init(use_GPU=use_GPU)

### Perform OCR on the image
def _identify(ocr, image) -> list:
    if isinstance(ocr, PaddleOCR):
        return _paddle_ocr(ocr, image)
    elif isinstance(ocr, easyocr.Reader):
        return _easy_ocr(ocr, image)
    elif isinstance(ocr, RapidOCR):
        return _rapid_ocr(ocr, image)
    else:
        raise ValueError("Invalid OCR model. Please initialise the OCR model first with init() and pass it as an argument to _identify().")


### Filter out the results that are not in the source language. Slower but for a wider range of languages
# not working but also not very reliable so don't worry about it
def _id_filtered(ocr, image, lang) -> list:
    lang = standardize_lang(lang)['id_model_lang']
    result = _identify(ocr, image)
    ### Parallelise since langid is slow
    def classify_text(entry):
        return entry if langid.classify(entry[1])[0] == lang else None
    with ThreadPoolExecutor() as executor:
        results_no_eng = list(filter(None, executor.map(classify_text, result)))
    return results_no_eng


# ch_sim, ch_tra, ja, ko, en
def _id_lang(ocr, image, lang) -> list:
    result = _identify(ocr, image)
    lang = standardize_lang(lang)['id_model_lang']
    try:
        filtered = [entry for entry in result if contains_lang(entry[1], lang)]
    except:
        logger.error(f"Selected language not part of default: {default_languages}.")
        raise ValueError(f"Selected language not part of default: {default_languages}.")
    return filtered

def id_keep_source_lang(ocr, image, lang) -> list:
    try:
        return _id_lang(ocr, image, lang)
    except ValueError:
        try:
            return _id_filtered(ocr, image, lang)
        except Exception as e:
            print(f'Probably an issue with the _id_filtered function. {e}')
            sys.exit(1)


def get_words(ocr_output) -> list:
    return [entry[1] for entry in ocr_output]

def get_positions(ocr_output) -> list:
    return [entry[0] for entry in ocr_output]

def get_confidences(ocr_output) -> list:
    return [entry[2] for entry in ocr_output]


if __name__ == '__main__':
    # OCR_languages = ['ch_sim','en']
    # image_old = '/home/James/Pictures/Screenshots/DP-1.jpg'
    # reader = easyocr.Reader(OCR_languages, gpu=True) # this needs to run only once to load the model into memory
    # result = reader.readtext(image_old)
    # print(result)
    print(id_keep_source_lang(init_OCR(model='paddle', paddle_lang='zh', easy_languages=['en', 'ch_sim']), '/home/James/Pictures/Screenshots/DP-1.jpg', 'ch_sim'))