onscreen-translator/config.py

import os, ast, torch, platform
from dotenv import load_dotenv
load_dotenv(override=True)

if platform.system() == 'Windows':
    default_tmp_dir = "C:\\Users\\AppData\\Local\\Temp"
elif platform.system() in ['Linux', 'Darwin']:
    default_tmp_dir = "/tmp"


###################################################################################################
### EDIT THESE VARIABLES ###
# Create a .env file in the same directory as this file and add the variables there. Of course you can choose to edit this file but then if you pull from the repository again all the config will be goneeee unless perhaps it is saved or stashed.
# The default values should be fine for most cases. Only ones that you need to change are the API keys, and the variables under Translation and API Translation if you choose to use an external API.
# available languages: 'ch_sim', 'ch_tra', 'ja', 'ko', 'en'

INTERVAL = float(os.getenv('INTERVAL', 1.5)) # Interval in seconds between translations. If your system is slow, a lower value is probably fine with regards to API rates.

### OCR
IMAGE_CHANGE_THRESHOLD = float(os.getenv('IMAGE_CHANGE_THRESHOLD', 0.75)) # higher values mean more sensitivity to changes in the screen, too high and the screen will constantly refresh
OCR_MODEL = os.getenv('OCR_MODEL', 'easy') # 'easy', 'paddle', 'rapid' ### easy is the most accurate, paddle is the fastest with CUDA and rapid is the fastest with CPU. Rapid has only between Chinese and English unless you add more languages
OCR_USE_GPU = ast.literal_eval(os.getenv('OCR_USE_GPU', 'True')) # True or False to use CUDA for OCR. Defaults to CPU if no CUDA GPU is available


### Drawing/Overlay Config
FILL_COLOUR = os.getenv('FILL_COLOUR', 'white') # colour of the textboxes
FONT_COLOUR = os.getenv('FONT_COLOUR', "#ff0000")  # colour of the font
FONT_FILE = os.getenv('FONT_FILE', os.path.join(__file__, "fonts", "Arial-Unicode-Bold.ttf")) # path to the font file. Ensure it is a unicode .ttf file if you want to be able to see most languages.
FONT_SIZE_MAX = int(os.getenv('FONT_SIZE_MAX', 20)) # Maximum font size you want to be able to see onscreen
FONT_SIZE_MIN = int(os.getenv('FONT_SIZE_MIN', 8)) # Minimum font size you want to be able to see onscreen
LINE_SPACING = int(os.getenv('LINE_SPACING', 3)) # spacing between lines of text with the learn modes in DRAW_TRANSLATIONS_MODE
REGION = ast.literal_eval(os.getenv('REGION','(0,0,2560,1440)')) # (x1, y1, x2, y2) - the region of the screen to capture
DRAW_TRANSLATIONS_MODE = os.getenv('DRAW_TRANSLATIONS_MODE', 'learn_cover')
"""
DRAW_TRANSLATIONS_MODE  possible options:
`learn': adds translated text, original text (should be added so when texts get moved around the translation of which it references is understood) and (optionally with the other TO_ROMANIZE option) romanized text above the original text. Texts can overlap if squished into a corner. Works well for games where texts are sparser
'learn_cover': same as above but covers the original text with the translated text. Can help with readability and is less cluttered but with sufficiently dense text the texts can still overlap
'translation_only_cover': cover the original text with the translated text - will not show the original text at all but also will not be affected by overlapping texts
"""


### Translation
MAX_TRANSLATE = int(os.getenv('MAX_TRANSLATION', 200)) # Maximum number of phrases to send to the translation model to translate
SOURCE_LANG = os.getenv('SOURCE_LANG', 'ch_sim') #  Translate from 'ch_sim', 'ch_tra', 'ja', 'ko', 'en'
TARGET_LANG = os.getenv('TARGET_LANG', 'en')  # Translate to 'ch_sim', 'ch_tra', 'ja', 'ko', 'en'
TO_ROMANIZE = ast.literal_eval(os.getenv('TO_ROMANIZE', 'True')) # romanize the text or not. Only available for one of the learn modes in DRAW_TRANSLATIONS_MODE. It is added above the original text

### API Translation (could be external or a local API)
# API KEYS
GEMINI_API_KEY = os.getenv('GEMINI_API_KEY') # https://ai.google.dev/
GROQ_API_KEY = os.getenv('GROQ_API_KEY') # https://console.groq.com/keys
# MISTRAL_API_KEY = os.getenv('MISTRAL_API_KEY') # https://console.mistral.ai/api-keys/ slow asf

### Local Translation Models
TRANSLATION_MODEL= os.environ['TRANSLATION_MODEL'] # 'opus' or 'm2m' # opus is a lot more lightweight
TRANSLATION_USE_GPU = ast.literal_eval(os.getenv('TRANSLATION_USE_GPU', 'True'))
MAX_INPUT_TOKENS = int(os.getenv('MAX_INPUT_TOKENS', 512))
MAX_OUTPUT_TOKENS = int(os.getenv('MAX_OUTPUT_TOKENS', 512))
BATCH_SIZE = int(os.getenv('BATCH_SIZE', 6))
LOCAL_FILES_ONLY = ast.literal_eval(os.getenv('LOCAL_FILES_ONLY', 'False')) # will not attempt pinging Huggingface for the models and just use the cached local models


###################################################################################################


###################################################################################################
### DO NOT EDIT THESE VARIABLES ###
## Filepaths
API_MODELS_FILEPATH = os.path.join(os.path.dirname(__file__), 'api_models.json')


FONT_SIZE = int((FONT_SIZE_MAX + FONT_SIZE_MIN)/2)
LINE_HEIGHT = FONT_SIZE

if TRANSLATION_USE_GPU is False:
    device = torch.device("cpu")
else:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TEMP_IMG_DIR = os.getenv('TEMP_IMG_PATH',  default_tmp_dir) # where the temporary images are stored
TEMP_IMG_PATH = os.path.join(TEMP_IMG_DIR, 'tempP_img91258102.png')
### Just for info

available_langs = ['ch_sim', 'ch_tra', 'ja', 'ko', 'en'] # there are limitations with the languages that can be used with the OCR models
seq_llm_models = ['opus', 'm2m']
api_llm_models = ['gemini']
causal_llm_models = []
curr_models = seq_llm_models + api_llm_models + causal_llm_models