Made it so TikTok TTS can have more than 300 words

pull/2049/head
Alex 1 year ago
parent 340762e1b6
commit aebf2262c9

@ -1,80 +1,79 @@
# documentation for tiktok api: https://github.com/oscie57/tiktok-voice/wiki
import base64
import random import random
import time import time
from typing import Optional, Final from typing import Optional, Final
import requests, base64, re, sys
import requests from threading import Thread
from playsound import playsound
from utils import settings from utils import settings
__all__ = ["TikTok", "TikTokTTSException"] # define the endpoint data with URLs and corresponding response keys
ENDPOINT_DATA = [
disney_voices: Final[tuple] = ( {
"en_us_ghostface", # Ghost Face "url": "https://tiktok-tts.weilnet.workers.dev/api/generation",
"en_us_chewbacca", # Chewbacca "response": "data"
"en_us_c3po", # C3PO },
"en_us_stitch", # Stitch {
"en_us_stormtrooper", # Stormtrooper "url": "https://countik.com/api/text/speech",
"en_us_rocket", # Rocket "response": "v_data"
"en_female_madam_leota", # Madame Leota },
"en_male_ghosthost", # Ghost Host {
"en_male_pirate", # pirate "url": "https://gesserit.co/api/tiktok-tts",
) "response": "base64"
}
eng_voices: Final[tuple] = ( ]
"en_au_001", # English AU - Female
"en_au_002", # English AU - Male # define available voices for text-to-speech conversion
"en_uk_001", # English UK - Male 1 VOICES = [
"en_uk_003", # English UK - Male 2 # DISNEY VOICES
"en_us_001", # English US - Female (Int. 1) 'en_us_ghostface', # Ghost Face
"en_us_002", # English US - Female (Int. 2) 'en_us_chewbacca', # Chewbacca
"en_us_006", # English US - Male 1 'en_us_c3po', # C3PO
"en_us_007", # English US - Male 2 'en_us_stitch', # Stitch
"en_us_009", # English US - Male 3 'en_us_stormtrooper', # Stormtrooper
"en_us_010", # English US - Male 4 'en_us_rocket', # Rocket
"en_male_narration", # Narrator # ENGLISH VOICES
"en_male_funny", # Funny 'en_au_001', # English AU - Female
"en_female_emotional", # Peaceful 'en_au_002', # English AU - Male
"en_male_cody", # Serious 'en_uk_001', # English UK - Male 1
) 'en_uk_003', # English UK - Male 2
'en_us_001', # English US - Female (Int. 1)
non_eng_voices: Final[tuple] = ( 'en_us_002', # English US - Female (Int. 2)
# Western European voices 'en_us_006', # English US - Male 1
"fr_001", # French - Male 1 'en_us_007', # English US - Male 2
"fr_002", # French - Male 2 'en_us_009', # English US - Male 3
"de_001", # German - Female 'en_us_010', # English US - Male 4
"de_002", # German - Male # EUROPE VOICES
"es_002", # Spanish - Male 'fr_001', # French - Male 1
"it_male_m18", # Italian - Male 'fr_002', # French - Male 2
# South american voices 'de_001', # German - Female
"es_mx_002", # Spanish MX - Male 'de_002', # German - Male
"br_001", # Portuguese BR - Female 1 'es_002', # Spanish - Male
"br_003", # Portuguese BR - Female 2 # AMERICA VOICES
"br_004", # Portuguese BR - Female 3 'es_mx_002', # Spanish MX - Male
"br_005", # Portuguese BR - Male 'br_001', # Portuguese BR - Female 1
# asian voices 'br_003', # Portuguese BR - Female 2
"id_001", # Indonesian - Female 'br_004', # Portuguese BR - Female 3
"jp_001", # Japanese - Female 1 'br_005', # Portuguese BR - Male
"jp_003", # Japanese - Female 2 # ASIA VOICES
"jp_005", # Japanese - Female 3 'id_001', # Indonesian - Female
"jp_006", # Japanese - Male 'jp_001', # Japanese - Female 1
"kr_002", # Korean - Male 1 'jp_003', # Japanese - Female 2
"kr_003", # Korean - Female 'jp_005', # Japanese - Female 3
"kr_004", # Korean - Male 2 'jp_006', # Japanese - Male
) 'kr_002', # Korean - Male 1
'kr_003', # Korean - Female
vocals: Final[tuple] = ( 'kr_004', # Korean - Male 2
"en_female_f08_salut_damour", # Alto # SINGING VOICES
"en_male_m03_lobby", # Tenor 'en_female_f08_salut_damour', # Alto
"en_male_m03_sunshine_soon", # Sunshine Soon 'en_male_m03_lobby', # Tenor
"en_female_f08_warmy_breeze", # Warmy Breeze 'en_female_f08_warmy_breeze', # Warmy Breeze
"en_female_ht_f08_glorious", # Glorious 'en_male_m03_sunshine_soon', # Sunshine Soon
"en_male_sing_funny_it_goes_up", # It Goes Up # OTHER
"en_male_m2_xhxs_m03_silly", # Chipmunk 'en_male_narration', # narrator
"en_female_ht_f08_wonderful_world", # Dramatic 'en_male_funny', # wacky
) 'en_female_emotional', # peaceful
]
class TikTok: class TikTok:
"""TikTok Text-to-Speech Wrapper""" """TikTok Text-to-Speech Wrapper"""
@ -90,76 +89,86 @@ class TikTok:
self.max_chars = 200 self.max_chars = 200
self._session = requests.Session() self._session = requests.Session()
# set the headers to the session, so we don't have to do it for every request
self._session.headers = headers self._session.headers = headers
def run(self, text: str, filepath: str, random_voice: bool = False): def run(self, text: str, filepath: str, random_voice: bool = False, play_sound: bool = False):
if random_voice: if random_voice:
voice = self.random_voice() voice = self.random_voice()
else: else:
# if tiktok_voice is not set in the config file, then use a random voice
voice = settings.config["settings"]["tts"].get("tiktok_voice", None) voice = settings.config["settings"]["tts"].get("tiktok_voice", None)
# get the audio from the TikTok API chunks = self._split_text(text)
data = self.get_voices(voice=voice, text=text)
for entry in ENDPOINT_DATA:
endpoint_valid = True
audio_data = ["" for _ in range(len(chunks))]
# check if there was an error in the request def generate_audio_chunk(index: int, chunk: str) -> None:
status_code = data["status_code"] nonlocal endpoint_valid
if status_code != 0:
raise TikTokTTSException(status_code, data["message"]) if not endpoint_valid:
return
# decode data from base64 to binary
try: try:
raw_voices = data["data"]["v_str"] response = requests.post(
except: entry["url"],
print( json={
"The TikTok TTS returned an invalid response. Please try again later, and report this bug." "text": chunk,
"voice": voice
}
) )
raise TikTokTTSException(0, "Invalid response")
decoded_voices = base64.b64decode(raw_voices)
# write voices to specified filepath if response.status_code == 200:
with open(filepath, "wb") as out: audio_data[index] = response.json()[entry["response"]]
out.write(decoded_voices) else:
endpoint_valid = False
def get_voices(self, text: str, voice: Optional[str] = None) -> dict: except requests.RequestException as e:
"""If voice is not passed, the API will try to use the most fitting voice""" print(f"Error: {e}")
# sanitize text sys.exit()
text = text.replace("+", "plus").replace("&", "and").replace("r/", "")
# prepare url request threads = []
params = {"req_text": text, "speaker_map_type": 0, "aid": 1233} for index, chunk in enumerate(chunks):
thread = Thread(target=generate_audio_chunk, args=(index, chunk))
threads.append(thread)
thread.start()
if voice is not None: for thread in threads:
params["text_speaker"] = voice thread.join()
# send request if not endpoint_valid:
try: continue
response = self._session.post(self.URI_BASE, params=params)
except ConnectionError:
time.sleep(random.randrange(1, 7))
response = self._session.post(self.URI_BASE, params=params)
return response.json() audio_bytes = base64.b64decode("".join(audio_data))
@staticmethod with open(filepath, "wb") as file:
def random_voice() -> str: file.write(audio_bytes)
return random.choice(eng_voices) print(f"File '{filepath}' has been generated successfully.")
if play_sound:
playsound(filepath)
break
class TikTokTTSException(Exception): def _split_text(self, text: str) -> list[str]:
def __init__(self, code: int, message: str): merged_chunks: list[str] = []
self._code = code seperated_chunks: list[str] = re.findall(r'.*?[.,!?:;-]|.+', text)
self._message = message
def __str__(self) -> str: for i, chunk in enumerate(seperated_chunks):
if self._code == 1: if len(chunk) > 300:
return f"Code: {self._code}, reason: probably the aid value isn't correct, message: {self._message}" seperated_chunks[i:i+1] = re.findall(r'.*?[ ]|.+', chunk)
if self._code == 2: merged_chunk = ""
return f"Code: {self._code}, reason: the text is too long, message: {self._message}" for seperated_chunk in seperated_chunks:
if len(merged_chunk) + len(seperated_chunk) <= 300:
merged_chunk += seperated_chunk
else:
merged_chunks.append(merged_chunk)
merged_chunk = seperated_chunk
if self._code == 4: merged_chunks.append(merged_chunk)
return f"Code: {self._code}, reason: the speaker doesn't exist, message: {self._message}" return merged_chunks
return f"Code: {self._message}, reason: unknown, message: {self._message}" @staticmethod
def random_voice() -> str:
return random.choice(VOICES)

@ -21,3 +21,4 @@ transformers==4.29.2
ffmpeg-python==0.2.0 ffmpeg-python==0.2.0
elevenlabs==0.2.17 elevenlabs==0.2.17
yt-dlp==2023.7.6 yt-dlp==2023.7.6
playsound==1.2.2
Loading…
Cancel
Save