diff --git a/requirements.txt b/requirements.txt index 14cace5..d7c5e8c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,6 @@ pyttsx3==2.90 Pillow~=9.3.0 tomlkit==0.11.4 Flask==2.2.2 -spacy==3.4.1 +clean-text==0.6.0 +unidecode==1.3.2 +spacy==3.4.1 \ No newline at end of file diff --git a/utils/.config.template.toml b/utils/.config.template.toml index 6a7b024..4fe15fe 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -43,3 +43,4 @@ tiktok_voice = { optional = true, default = "en_us_006", example = "en_us_006", python_voice = { optional = true, default = "1", example = "1", explanation = "The index of the system tts voices (can be downloaded externally, run ptt.py to find value, start from zero)" } py_voice_num = { optional = true, default = "2", example = "2", explanation = "The number of system voices (2 are pre-installed in Windows)" } silence_duration = { optional = true, example = "0.1", explanation = "Time in seconds between TTS comments", default = 0.3, type = "float" } +no_emojis = { optional = false, type = "bool", default = false, example = false, options = [true, false,], explanation = "Whether to remove emojis from the comments" } \ No newline at end of file diff --git a/utils/voice.py b/utils/voice.py index 5606023..a88c87d 100644 --- a/utils/voice.py +++ b/utils/voice.py @@ -6,6 +6,9 @@ from time import sleep from requests import Response +from utils import settings +from cleantext import clean + if sys.version_info[0] >= 3: from datetime import timezone @@ -86,5 +89,10 @@ def sanitize_text(text: str) -> str: regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%—“”‘\"%\*/{}\[\]\(\)\\|<>=+]" result = re.sub(regex_expr, " ", result) result = result.replace("+", "plus").replace("&", "and") + + # emoji removal if the setting is enabled + if settings.config["settings"]["tts"]["no_emojis"]: + result = clean(result, no_emoji=True) + # remove extra whitespace return " ".join(result.split())