diff --git a/TTS/TikTok.py b/TTS/TikTok.py index 9c155ea..64372bd 100644 --- a/TTS/TikTok.py +++ b/TTS/TikTok.py @@ -1,26 +1,28 @@ +# documentation for tiktok api: https://github.com/oscie57/tiktok-voice/wiki import base64 import random +import time +from typing import Optional, Final import requests -from requests.adapters import HTTPAdapter, Retry from utils import settings -# from profanity_filter import ProfanityFilter -# pf = ProfanityFilter() -# Code by @JasonLovesDoggo -# https://twitter.com/scanlime/status/1512598559769702406 +__all__ = ["TikTok", "TikTokTTSException"] -nonhuman = [ # DISNEY VOICES +disney_voices: Final = ( "en_us_ghostface", # Ghost Face "en_us_chewbacca", # Chewbacca "en_us_c3po", # C3PO "en_us_stitch", # Stitch "en_us_stormtrooper", # Stormtrooper "en_us_rocket", # Rocket - # ENGLISH VOICES -] -human = [ + "en_female_madam_leota", # Madame Leota + "en_male_ghosthost", # Ghost Host + "en_male_pirate", # pirate +) + +eng_voices: Final = ( "en_au_001", # English AU - Female "en_au_002", # English AU - Male "en_uk_001", # English UK - Male 1 @@ -30,23 +32,28 @@ human = [ "en_us_006", # English US - Male 1 "en_us_007", # English US - Male 2 "en_us_009", # English US - Male 3 - "en_us_010", -] -voices = nonhuman + human + "en_us_010", # English US - Male 4 + "en_male_narration", # Narrator + "en_male_funny", # Funny + "en_female_emotional", # Peaceful + "en_male_cody", # Serious +) -noneng = [ +non_eng_voices: Final = ( + # Western European voices "fr_001", # French - Male 1 "fr_002", # French - Male 2 "de_001", # German - Female "de_002", # German - Male "es_002", # Spanish - Male - # AMERICA VOICES + "it_male_m18" # Italian - Male + # South american voices "es_mx_002", # Spanish MX - Male "br_001", # Portuguese BR - Female 1 "br_003", # Portuguese BR - Female 2 "br_004", # Portuguese BR - Female 3 "br_005", # Portuguese BR - Male - # ASIA VOICES + # asian voices "id_001", # Indonesian - Female "jp_001", # Japanese - Female 1 "jp_003", # Japanese - Female 2 @@ -55,52 +62,100 @@ noneng = [ "kr_002", # Korean - Male 1 "kr_003", # Korean - Female "kr_004", # Korean - Male 2 -] +) +vocals = ( + "en_female_f08_salut_damour", # Alto + "en_male_m03_lobby", # Tenor + "en_male_m03_sunshine_soon", # Sunshine Soon + "en_female_f08_warmy_breeze", # Warmy Breeze + "en_female_ht_f08_glorious", # Glorious + "en_male_sing_funny_it_goes_up", # It Goes Up + "en_male_m2_xhxs_m03_silly", # Chipmunk + "en_female_ht_f08_wonderful_world", # Dramatic +) -# good_voices = {'good': ['en_us_002', 'en_us_006'], -# 'ok': ['en_au_002', 'en_uk_001']} # less en_us_stormtrooper more less en_us_rocket en_us_ghostface +class TikTok: + """TikTok Text-to-Speech Wrapper""" + max_chars: Final = 300 + BASE_URL: Final = "https://api16-normal-c-useast1a.tiktokv.com/media/api/text/speech/invoke/" -class TikTok: # TikTok Text-to-Speech Wrapper def __init__(self): - self.URI_BASE = "https://api22-normal-c-useast1a.tiktokv.com/media/api/text/speech/invoke/?text_speaker=" - self.max_chars = 200 - self.voices = {"human": human, "nonhuman": nonhuman, "noneng": noneng} - self.headers = { - "User-Agent": "com.zhiliaoapp.musically/2022600030 (Linux; U; Android 7.1.2; es_ES; SM-G988N; Build/NRD90M;tt-ok/3.12.13.1)", - "Cookie": "sessionid=" - + settings.config["settings"]["tts"]["tiktok_sessionid"], + headers = { + "User-Agent": "com.zhiliaoapp.musically/2022600030 (Linux; U; Android 7.1.2; es_ES; SM-G988N; " + "Build/NRD90M;tt-ok/3.12.13.1)", + "Cookie": f"sessionid={settings.config['settings']['tts']['tiktok_sessionid']}" } - def run(self, text, filepath, random_voice: bool = False): - # if censor: - # req_text = pf.censor(req_text) - # pass - voice = ( - self.randomvoice() - if random_voice - else (settings.config["settings"]["tts"]["tiktok_voice"] or random.choice(self.voices["human"])) - ) - try: - url = f"{self.URI_BASE}{voice}&req_text={text}&speaker_map_type=0&aid=1233" - r = requests.post(url, headers=self.headers) - except requests.exceptions.SSLError: - # https://stackoverflow.com/a/47475019/18516611 - session = requests.Session() - retry = Retry(connect=3, backoff_factor=0.5) - adapter = HTTPAdapter(max_retries=retry) - session.mount("http://", adapter) - session.mount("https://", adapter) - r = session.post( - f"{self.URI_BASE}{voice}&req_text={text}&speaker_map_type=0" - ) - # print(r.text) - vstr = [r.json()["data"]["v_str"]][0] - b64d = base64.b64decode(vstr) + self._session = requests.Session() + self._session.headers = headers + + def run(self, text: str, filepath: str, random_voice: bool = False): + if random_voice: + voice = self.random_voice() + else: + # if tiktok_voice is not set in the config file, then it will automatically choose one + voice = settings.config["settings"]["tts"].get("tiktok_voice") + + # get the audio from the TikTok API + data = self.get_voices(voice=voice, text=text) + + # check if there was an error in the request + status_code = data["status_code"] + if status_code != 0: + raise TikTokTTSException(status_code, data["message"]) + # decode data from base64 to binary + raw_voices = [data["data"]["v_str"]][0] + decoded_voices = base64.b64decode(raw_voices) + + # write voices to specified filepath with open(filepath, "wb") as out: - out.write(b64d) + out.write(decoded_voices) + + def get_voices(self, text: str, voice: Optional[str] = None) -> dict: + """If voice is not passed, the API will try to use the most fitting voice""" + # sanitize text + text = text.replace("+", "plus").replace(" ", "+").replace("&", "and").replace("r/", "") + + # prepare url request + params = { + "req_text": text, + "speaker_map_type": 0, + "aid": 1233 + } + + if voice is not None: + params["text_speaker"] = voice + + url = self.BASE_URL.format(text=text) + + # send request + try: + response = self._session.post(url, params=params) + except ConnectionError: + time.sleep(random.randrange(1, 7)) + response = self._session.post(url, params=params) + + return response.json() + + @staticmethod + def random_voice(): + return random.choice(eng_voices["human"]) + + +class TikTokTTSException(Exception): + def __init__(self, code: int, message: str): + self._code = code + self._message = message - def randomvoice(self): - return random.choice(self.voices["human"]) + def __str__(self): + if self._code == 1: + return f"Code: {self._message}, reason: probably the aid value isn't correct, message: {self._message}" + elif self._code == 2: + return f"Code: {self._message}, reason: the text is too long, message: {self._message}" + elif self._code == 4: + return f"Code: {self._message}, reason: the speaker doesn't exist, message: {self._message}" + else: + return f"Code: {self._message}, reason: unknown, message: {self._message}" diff --git a/utils/.config.template.toml b/utils/.config.template.toml index 0130227..aea89c7 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -34,7 +34,7 @@ background_choice = { optional = true, default = "minecraft", example = "rocket- voice_choice = { optional = false, default = "tiktok", options = ["streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", ], example = "tiktok", explanation = "The voice platform used for TTS generation. This can be left blank and you will be prompted to choose at runtime." } aws_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for AWS Polly" } streamlabs_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for Streamlabs Polly" } -tiktok_voice = { optional = false, default = "en_us_006", example = "en_us_006", explanation = "The voice used for TikTok TTS" } +tiktok_voice = { optional = true, example = "en_us_006", explanation = "The voice used for TikTok TTS" } tiktok_sessionid = { optional = true, example = "c76bcc3a7625abcc27b508c7db457ff1", explanation = "TikTok sessionid needed for the TTS API request. Check documentation if you don't know how to obtain it." } python_voice = { optional = false, default = "1", example = "1", explanation = "The index of the system tts voices (can be downloaded externally, run ptt.py to find value, start from zero)" } py_voice_num = { optional = false, default = "2", example = "2", explanation = "The number of system voices (2 are pre-installed in Windows)" }