onscreen-translator/helpers/utils.py

191 lines
6.1 KiB
Python

import re, uroman as ur
from pypinyin import pinyin
import pyscreenshot as ImageGrab # wayland tings not sure if it will work on other machines alternatively use mss
import mss, io, os
from PIL import Image
import jaconv, MeCab, unidic, pykakasi
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
# for creating furigana
mecab = MeCab.Tagger('-d "{}"'.format(unidic.DICDIR))
uroman = ur.Uroman()
# for romanising japanese text. Can convert to hiragana or katakana as well but does not split the words up so harder to use for furigana
kks = pykakasi.kakasi()
# define a function for checking whether one axis of a shape intercepts with another
def intercepts(x,y):
# both x and y are two dimensional tuples representing the ends of a line on one dimension.
x1, x2 = x
y1, y2 = y
return (x1 <= y1 <= x2) or (x1 <= y2 <= x2) or (y1 <= x1 <= y2) or (y1 <= x2 <= y2)
def is_wayland():
return 'WAYLAND_DISPLAY' in os.environ
# path to save screenshot of monitor to
def printsc_wayland(region, save: bool = False, path: str = None):
if save:
im = ImageGrab.grab(bbox=region)
im.save(path)
else:
return ImageGrab.grab(bbox=region)
def printsc_non_wayland(region, save: bool = False, path: str = None):
# use mss to capture the screen
with mss.mss() as sct:
# grab the screen
img = sct.grab(region)
# convert the image to a PIL image
image = Image.frombytes("RGB", img.size, img.bgra, "raw", "BGRX")
# save the image if save is True
if save:
image.save(path)
def printsc(region, save: bool = False, path: str = None):
try:
if is_wayland():
return printsc_wayland(region, save, path)
else:
return printsc_non_wayland(region, save, path)
except Exception as e:
print(f'Error {e}')
def convert_image_to_bytes(img):
with io.BytesIO() as byte_stream:
img.save(byte_stream, format='PNG') # Save the image to the byte stream
return byte_stream.getvalue() # Get the byte representation
def bytes_to_image(image_bytes):
# Load the image from bytes
byte_stream = io.BytesIO(image_bytes)
# Open the image from the BytesIO stream
image = Image.open(byte_stream)
return image
# for japanese
def add_furigana(text):
parsed = mecab.parse(text).split('\n')[:-2]
furigana_string = ''
for i in parsed:
words = i.split('\t')[0]
try :
add = f'({jaconv.kata2hira(i.split(',')[6])})'
except:
add = ''
to_add = add if contains_kanji(words) else ''
furigana_string += i.split('\t')[0] + to_add
return furigana_string
def contains_kanji(text):
return bool(re.search(r'[\u4E00-\u9FFF]', text))
def contains_hiragana(text):
return bool(re.search(r'[\u3040-\u309F]', text))
def contains_katakana(text):
return bool(re.search(r'[\u30A0-\u30FF]', text))
# use kakasi to romanize japanese text
def romanize(text, lang):
if lang in ['zh','ch_sim','ch_tra']:
return ' '.join([ py[0] for py in pinyin(text, heteronym=True)])
if lang == 'ja':
return kks.convert(text)[0]['hepburn']
return uroman.romanize_string(text)
# check if a string contains words from a language
def contains_lang(text, lang):
# Matches any character in the Unicode range of the language
if lang == 'zh':
return bool(re.search(r'[\u4e00-\u9fff]', text))
elif lang == 'ja':
return bool(re.search(r'[\u3040-\u30ff]', text))
elif lang == 'ko':
return bool(re.search(r'[\uac00-\ud7af]', text))
elif lang == 'en':
return bool(re.search(r'[a-zA-Z]', text))
else:
raise ValueError("Invalid language. Please use one of 'en', 'zh', 'ja', or 'ko'.")
### en, ch_sim, ch_tra, ja, ko rapidocr only has chinese and en at the moment
def standardize_lang(lang):
if lang == 'ch_sim':
easyocr_lang = 'ch_sim'
paddleocr_lang = 'ch'
rapidocr_lang = 'ch'
translation_model_lang = 'zh'
id_model_lang = 'zh'
elif lang == 'ch_tra':
easyocr_lang = 'ch_tra'
paddleocr_lang = 'ch'
rapidocr_lang = 'ch'
translation_model_lang = 'zh'
id_model_lang = 'zh'
elif lang == 'ja':
easyocr_lang = 'ja'
paddleocr_lang = 'japan'
rapidocr_lang = 'ja'
translation_model_lang = 'ja'
id_model_lang = 'ja'
elif lang == 'ko':
easyocr_lang = 'korean'
paddleocr_lang = 'korean'
rapidocr_lang = 'ko'
translation_model_lang = 'ko'
id_model_lang = 'ko'
elif lang == 'en':
easyocr_lang = 'en'
paddleocr_lang = 'en'
rapidocr_lang = 'en'
translation_model_lang = 'en'
id_model_lang = 'en'
else:
raise ValueError(f"Invalid language {lang}. Please use one of 'en', 'ch_sim', 'ch_tra', 'ja', or 'ko'.")
return {'easyocr_lang': easyocr_lang,
'paddleocr_lang': paddleocr_lang,
'rapidocr_lang': rapidocr_lang,
'translation_model_lang': translation_model_lang,
'id_model_lang': id_model_lang}
def which_ocr_lang(model):
if model == 'easy':
return 'easyocr_lang'
elif model == 'paddle':
return 'paddleocr_lang'
elif model == 'rapid':
return 'rapidocr_lang'
else:
raise ValueError("Invalid OCR model. Please use one of 'easy', 'paddle', or 'rapid'.")
def similar_tfidf(list1,list2,threshold) -> float:
"""Calculate cosine similarity using TF-IDF vectors."""
if not list1 or not list2:
return 0.0
vectorizer = TfidfVectorizer()
all_texts = list1 + list2
tfidf_matrix = vectorizer.fit_transform(all_texts)
# Calculate average vectors for each list
vec1 = np.mean(tfidf_matrix[:len(list1)].toarray(), axis=0).reshape(1, -1)
vec2 = np.mean(tfidf_matrix[len(list1):].toarray(), axis=0).reshape(1, -1)
return float(cosine_similarity(vec1, vec2)[0, 0]) > threshold
if __name__ == "__main__":
# Example usage
japanesetext = "本が好きにちは"
print(add_furigana(japanesetext))