diff --git a/TTS/TikTok.py b/TTS/TikTok.py index 29542e2..cdba804 100644 --- a/TTS/TikTok.py +++ b/TTS/TikTok.py @@ -1,80 +1,79 @@ -# documentation for tiktok api: https://github.com/oscie57/tiktok-voice/wiki -import base64 import random import time from typing import Optional, Final - -import requests +import requests, base64, re, sys +from threading import Thread +from playsound import playsound from utils import settings -__all__ = ["TikTok", "TikTokTTSException"] - -disney_voices: Final[tuple] = ( - "en_us_ghostface", # Ghost Face - "en_us_chewbacca", # Chewbacca - "en_us_c3po", # C3PO - "en_us_stitch", # Stitch - "en_us_stormtrooper", # Stormtrooper - "en_us_rocket", # Rocket - "en_female_madam_leota", # Madame Leota - "en_male_ghosthost", # Ghost Host - "en_male_pirate", # pirate -) - -eng_voices: Final[tuple] = ( - "en_au_001", # English AU - Female - "en_au_002", # English AU - Male - "en_uk_001", # English UK - Male 1 - "en_uk_003", # English UK - Male 2 - "en_us_001", # English US - Female (Int. 1) - "en_us_002", # English US - Female (Int. 2) - "en_us_006", # English US - Male 1 - "en_us_007", # English US - Male 2 - "en_us_009", # English US - Male 3 - "en_us_010", # English US - Male 4 - "en_male_narration", # Narrator - "en_male_funny", # Funny - "en_female_emotional", # Peaceful - "en_male_cody", # Serious -) - -non_eng_voices: Final[tuple] = ( - # Western European voices - "fr_001", # French - Male 1 - "fr_002", # French - Male 2 - "de_001", # German - Female - "de_002", # German - Male - "es_002", # Spanish - Male - "it_male_m18", # Italian - Male - # South american voices - "es_mx_002", # Spanish MX - Male - "br_001", # Portuguese BR - Female 1 - "br_003", # Portuguese BR - Female 2 - "br_004", # Portuguese BR - Female 3 - "br_005", # Portuguese BR - Male - # asian voices - "id_001", # Indonesian - Female - "jp_001", # Japanese - Female 1 - "jp_003", # Japanese - Female 2 - "jp_005", # Japanese - Female 3 - "jp_006", # Japanese - Male - "kr_002", # Korean - Male 1 - "kr_003", # Korean - Female - "kr_004", # Korean - Male 2 -) - -vocals: Final[tuple] = ( - "en_female_f08_salut_damour", # Alto - "en_male_m03_lobby", # Tenor - "en_male_m03_sunshine_soon", # Sunshine Soon - "en_female_f08_warmy_breeze", # Warmy Breeze - "en_female_ht_f08_glorious", # Glorious - "en_male_sing_funny_it_goes_up", # It Goes Up - "en_male_m2_xhxs_m03_silly", # Chipmunk - "en_female_ht_f08_wonderful_world", # Dramatic -) - +# define the endpoint data with URLs and corresponding response keys +ENDPOINT_DATA = [ + { + "url": "https://tiktok-tts.weilnet.workers.dev/api/generation", + "response": "data" + }, + { + "url": "https://countik.com/api/text/speech", + "response": "v_data" + }, + { + "url": "https://gesserit.co/api/tiktok-tts", + "response": "base64" + } +] + +# define available voices for text-to-speech conversion +VOICES = [ + # DISNEY VOICES + 'en_us_ghostface', # Ghost Face + 'en_us_chewbacca', # Chewbacca + 'en_us_c3po', # C3PO + 'en_us_stitch', # Stitch + 'en_us_stormtrooper', # Stormtrooper + 'en_us_rocket', # Rocket + # ENGLISH VOICES + 'en_au_001', # English AU - Female + 'en_au_002', # English AU - Male + 'en_uk_001', # English UK - Male 1 + 'en_uk_003', # English UK - Male 2 + 'en_us_001', # English US - Female (Int. 1) + 'en_us_002', # English US - Female (Int. 2) + 'en_us_006', # English US - Male 1 + 'en_us_007', # English US - Male 2 + 'en_us_009', # English US - Male 3 + 'en_us_010', # English US - Male 4 + # EUROPE VOICES + 'fr_001', # French - Male 1 + 'fr_002', # French - Male 2 + 'de_001', # German - Female + 'de_002', # German - Male + 'es_002', # Spanish - Male + # AMERICA VOICES + 'es_mx_002', # Spanish MX - Male + 'br_001', # Portuguese BR - Female 1 + 'br_003', # Portuguese BR - Female 2 + 'br_004', # Portuguese BR - Female 3 + 'br_005', # Portuguese BR - Male + # ASIA VOICES + 'id_001', # Indonesian - Female + 'jp_001', # Japanese - Female 1 + 'jp_003', # Japanese - Female 2 + 'jp_005', # Japanese - Female 3 + 'jp_006', # Japanese - Male + 'kr_002', # Korean - Male 1 + 'kr_003', # Korean - Female + 'kr_004', # Korean - Male 2 + # SINGING VOICES + 'en_female_f08_salut_damour', # Alto + 'en_male_m03_lobby', # Tenor + 'en_female_f08_warmy_breeze', # Warmy Breeze + 'en_male_m03_sunshine_soon', # Sunshine Soon + # OTHER + 'en_male_narration', # narrator + 'en_male_funny', # wacky + 'en_female_emotional', # peaceful +] class TikTok: """TikTok Text-to-Speech Wrapper""" @@ -90,76 +89,86 @@ class TikTok: self.max_chars = 200 self._session = requests.Session() - # set the headers to the session, so we don't have to do it for every request self._session.headers = headers - def run(self, text: str, filepath: str, random_voice: bool = False): + def run(self, text: str, filepath: str, random_voice: bool = False, play_sound: bool = False): if random_voice: voice = self.random_voice() else: - # if tiktok_voice is not set in the config file, then use a random voice voice = settings.config["settings"]["tts"].get("tiktok_voice", None) - # get the audio from the TikTok API - data = self.get_voices(voice=voice, text=text) + chunks = self._split_text(text) - # check if there was an error in the request - status_code = data["status_code"] - if status_code != 0: - raise TikTokTTSException(status_code, data["message"]) + for entry in ENDPOINT_DATA: + endpoint_valid = True + audio_data = ["" for _ in range(len(chunks))] - # decode data from base64 to binary - try: - raw_voices = data["data"]["v_str"] - except: - print( - "The TikTok TTS returned an invalid response. Please try again later, and report this bug." - ) - raise TikTokTTSException(0, "Invalid response") - decoded_voices = base64.b64decode(raw_voices) + def generate_audio_chunk(index: int, chunk: str) -> None: + nonlocal endpoint_valid - # write voices to specified filepath - with open(filepath, "wb") as out: - out.write(decoded_voices) + if not endpoint_valid: + return - def get_voices(self, text: str, voice: Optional[str] = None) -> dict: - """If voice is not passed, the API will try to use the most fitting voice""" - # sanitize text - text = text.replace("+", "plus").replace("&", "and").replace("r/", "") + try: + response = requests.post( + entry["url"], + json={ + "text": chunk, + "voice": voice + } + ) - # prepare url request - params = {"req_text": text, "speaker_map_type": 0, "aid": 1233} + if response.status_code == 200: + audio_data[index] = response.json()[entry["response"]] + else: + endpoint_valid = False - if voice is not None: - params["text_speaker"] = voice + except requests.RequestException as e: + print(f"Error: {e}") + sys.exit() - # send request - try: - response = self._session.post(self.URI_BASE, params=params) - except ConnectionError: - time.sleep(random.randrange(1, 7)) - response = self._session.post(self.URI_BASE, params=params) + threads = [] + for index, chunk in enumerate(chunks): + thread = Thread(target=generate_audio_chunk, args=(index, chunk)) + threads.append(thread) + thread.start() - return response.json() + for thread in threads: + thread.join() - @staticmethod - def random_voice() -> str: - return random.choice(eng_voices) + if not endpoint_valid: + continue + + audio_bytes = base64.b64decode("".join(audio_data)) + with open(filepath, "wb") as file: + file.write(audio_bytes) + print(f"File '{filepath}' has been generated successfully.") -class TikTokTTSException(Exception): - def __init__(self, code: int, message: str): - self._code = code - self._message = message + if play_sound: + playsound(filepath) - def __str__(self) -> str: - if self._code == 1: - return f"Code: {self._code}, reason: probably the aid value isn't correct, message: {self._message}" + break - if self._code == 2: - return f"Code: {self._code}, reason: the text is too long, message: {self._message}" + def _split_text(self, text: str) -> list[str]: + merged_chunks: list[str] = [] + seperated_chunks: list[str] = re.findall(r'.*?[.,!?:;-]|.+', text) - if self._code == 4: - return f"Code: {self._code}, reason: the speaker doesn't exist, message: {self._message}" + for i, chunk in enumerate(seperated_chunks): + if len(chunk) > 300: + seperated_chunks[i:i+1] = re.findall(r'.*?[ ]|.+', chunk) - return f"Code: {self._message}, reason: unknown, message: {self._message}" + merged_chunk = "" + for seperated_chunk in seperated_chunks: + if len(merged_chunk) + len(seperated_chunk) <= 300: + merged_chunk += seperated_chunk + else: + merged_chunks.append(merged_chunk) + merged_chunk = seperated_chunk + + merged_chunks.append(merged_chunk) + return merged_chunks + + @staticmethod + def random_voice() -> str: + return random.choice(VOICES) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index c9abc85..9320a9a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,4 +20,5 @@ torch==2.0.1 transformers==4.29.2 ffmpeg-python==0.2.0 elevenlabs==0.2.17 -yt-dlp==2023.7.6 \ No newline at end of file +yt-dlp==2023.7.6 +playsound==1.2.2 \ No newline at end of file