Made it so TikTok TTS can have more than 300 words

pull/2049/head
Alex 1 year ago
parent 340762e1b6
commit aebf2262c9

@ -1,80 +1,79 @@
# documentation for tiktok api: https://github.com/oscie57/tiktok-voice/wiki
import base64
import random
import time
from typing import Optional, Final
import requests
import requests, base64, re, sys
from threading import Thread
from playsound import playsound
from utils import settings
__all__ = ["TikTok", "TikTokTTSException"]
disney_voices: Final[tuple] = (
"en_us_ghostface", # Ghost Face
"en_us_chewbacca", # Chewbacca
"en_us_c3po", # C3PO
"en_us_stitch", # Stitch
"en_us_stormtrooper", # Stormtrooper
"en_us_rocket", # Rocket
"en_female_madam_leota", # Madame Leota
"en_male_ghosthost", # Ghost Host
"en_male_pirate", # pirate
)
eng_voices: Final[tuple] = (
"en_au_001", # English AU - Female
"en_au_002", # English AU - Male
"en_uk_001", # English UK - Male 1
"en_uk_003", # English UK - Male 2
"en_us_001", # English US - Female (Int. 1)
"en_us_002", # English US - Female (Int. 2)
"en_us_006", # English US - Male 1
"en_us_007", # English US - Male 2
"en_us_009", # English US - Male 3
"en_us_010", # English US - Male 4
"en_male_narration", # Narrator
"en_male_funny", # Funny
"en_female_emotional", # Peaceful
"en_male_cody", # Serious
)
non_eng_voices: Final[tuple] = (
# Western European voices
"fr_001", # French - Male 1
"fr_002", # French - Male 2
"de_001", # German - Female
"de_002", # German - Male
"es_002", # Spanish - Male
"it_male_m18", # Italian - Male
# South american voices
"es_mx_002", # Spanish MX - Male
"br_001", # Portuguese BR - Female 1
"br_003", # Portuguese BR - Female 2
"br_004", # Portuguese BR - Female 3
"br_005", # Portuguese BR - Male
# asian voices
"id_001", # Indonesian - Female
"jp_001", # Japanese - Female 1
"jp_003", # Japanese - Female 2
"jp_005", # Japanese - Female 3
"jp_006", # Japanese - Male
"kr_002", # Korean - Male 1
"kr_003", # Korean - Female
"kr_004", # Korean - Male 2
)
vocals: Final[tuple] = (
"en_female_f08_salut_damour", # Alto
"en_male_m03_lobby", # Tenor
"en_male_m03_sunshine_soon", # Sunshine Soon
"en_female_f08_warmy_breeze", # Warmy Breeze
"en_female_ht_f08_glorious", # Glorious
"en_male_sing_funny_it_goes_up", # It Goes Up
"en_male_m2_xhxs_m03_silly", # Chipmunk
"en_female_ht_f08_wonderful_world", # Dramatic
)
# define the endpoint data with URLs and corresponding response keys
ENDPOINT_DATA = [
{
"url": "https://tiktok-tts.weilnet.workers.dev/api/generation",
"response": "data"
},
{
"url": "https://countik.com/api/text/speech",
"response": "v_data"
},
{
"url": "https://gesserit.co/api/tiktok-tts",
"response": "base64"
}
]
# define available voices for text-to-speech conversion
VOICES = [
# DISNEY VOICES
'en_us_ghostface', # Ghost Face
'en_us_chewbacca', # Chewbacca
'en_us_c3po', # C3PO
'en_us_stitch', # Stitch
'en_us_stormtrooper', # Stormtrooper
'en_us_rocket', # Rocket
# ENGLISH VOICES
'en_au_001', # English AU - Female
'en_au_002', # English AU - Male
'en_uk_001', # English UK - Male 1
'en_uk_003', # English UK - Male 2
'en_us_001', # English US - Female (Int. 1)
'en_us_002', # English US - Female (Int. 2)
'en_us_006', # English US - Male 1
'en_us_007', # English US - Male 2
'en_us_009', # English US - Male 3
'en_us_010', # English US - Male 4
# EUROPE VOICES
'fr_001', # French - Male 1
'fr_002', # French - Male 2
'de_001', # German - Female
'de_002', # German - Male
'es_002', # Spanish - Male
# AMERICA VOICES
'es_mx_002', # Spanish MX - Male
'br_001', # Portuguese BR - Female 1
'br_003', # Portuguese BR - Female 2
'br_004', # Portuguese BR - Female 3
'br_005', # Portuguese BR - Male
# ASIA VOICES
'id_001', # Indonesian - Female
'jp_001', # Japanese - Female 1
'jp_003', # Japanese - Female 2
'jp_005', # Japanese - Female 3
'jp_006', # Japanese - Male
'kr_002', # Korean - Male 1
'kr_003', # Korean - Female
'kr_004', # Korean - Male 2
# SINGING VOICES
'en_female_f08_salut_damour', # Alto
'en_male_m03_lobby', # Tenor
'en_female_f08_warmy_breeze', # Warmy Breeze
'en_male_m03_sunshine_soon', # Sunshine Soon
# OTHER
'en_male_narration', # narrator
'en_male_funny', # wacky
'en_female_emotional', # peaceful
]
class TikTok:
"""TikTok Text-to-Speech Wrapper"""
@ -90,76 +89,86 @@ class TikTok:
self.max_chars = 200
self._session = requests.Session()
# set the headers to the session, so we don't have to do it for every request
self._session.headers = headers
def run(self, text: str, filepath: str, random_voice: bool = False):
def run(self, text: str, filepath: str, random_voice: bool = False, play_sound: bool = False):
if random_voice:
voice = self.random_voice()
else:
# if tiktok_voice is not set in the config file, then use a random voice
voice = settings.config["settings"]["tts"].get("tiktok_voice", None)
# get the audio from the TikTok API
data = self.get_voices(voice=voice, text=text)
chunks = self._split_text(text)
# check if there was an error in the request
status_code = data["status_code"]
if status_code != 0:
raise TikTokTTSException(status_code, data["message"])
for entry in ENDPOINT_DATA:
endpoint_valid = True
audio_data = ["" for _ in range(len(chunks))]
# decode data from base64 to binary
try:
raw_voices = data["data"]["v_str"]
except:
print(
"The TikTok TTS returned an invalid response. Please try again later, and report this bug."
)
raise TikTokTTSException(0, "Invalid response")
decoded_voices = base64.b64decode(raw_voices)
def generate_audio_chunk(index: int, chunk: str) -> None:
nonlocal endpoint_valid
# write voices to specified filepath
with open(filepath, "wb") as out:
out.write(decoded_voices)
if not endpoint_valid:
return
def get_voices(self, text: str, voice: Optional[str] = None) -> dict:
"""If voice is not passed, the API will try to use the most fitting voice"""
# sanitize text
text = text.replace("+", "plus").replace("&", "and").replace("r/", "")
try:
response = requests.post(
entry["url"],
json={
"text": chunk,
"voice": voice
}
)
# prepare url request
params = {"req_text": text, "speaker_map_type": 0, "aid": 1233}
if response.status_code == 200:
audio_data[index] = response.json()[entry["response"]]
else:
endpoint_valid = False
if voice is not None:
params["text_speaker"] = voice
except requests.RequestException as e:
print(f"Error: {e}")
sys.exit()
# send request
try:
response = self._session.post(self.URI_BASE, params=params)
except ConnectionError:
time.sleep(random.randrange(1, 7))
response = self._session.post(self.URI_BASE, params=params)
threads = []
for index, chunk in enumerate(chunks):
thread = Thread(target=generate_audio_chunk, args=(index, chunk))
threads.append(thread)
thread.start()
return response.json()
for thread in threads:
thread.join()
@staticmethod
def random_voice() -> str:
return random.choice(eng_voices)
if not endpoint_valid:
continue
audio_bytes = base64.b64decode("".join(audio_data))
with open(filepath, "wb") as file:
file.write(audio_bytes)
print(f"File '{filepath}' has been generated successfully.")
class TikTokTTSException(Exception):
def __init__(self, code: int, message: str):
self._code = code
self._message = message
if play_sound:
playsound(filepath)
def __str__(self) -> str:
if self._code == 1:
return f"Code: {self._code}, reason: probably the aid value isn't correct, message: {self._message}"
break
if self._code == 2:
return f"Code: {self._code}, reason: the text is too long, message: {self._message}"
def _split_text(self, text: str) -> list[str]:
merged_chunks: list[str] = []
seperated_chunks: list[str] = re.findall(r'.*?[.,!?:;-]|.+', text)
if self._code == 4:
return f"Code: {self._code}, reason: the speaker doesn't exist, message: {self._message}"
for i, chunk in enumerate(seperated_chunks):
if len(chunk) > 300:
seperated_chunks[i:i+1] = re.findall(r'.*?[ ]|.+', chunk)
return f"Code: {self._message}, reason: unknown, message: {self._message}"
merged_chunk = ""
for seperated_chunk in seperated_chunks:
if len(merged_chunk) + len(seperated_chunk) <= 300:
merged_chunk += seperated_chunk
else:
merged_chunks.append(merged_chunk)
merged_chunk = seperated_chunk
merged_chunks.append(merged_chunk)
return merged_chunks
@staticmethod
def random_voice() -> str:
return random.choice(VOICES)

@ -21,3 +21,4 @@ transformers==4.29.2
ffmpeg-python==0.2.0
elevenlabs==0.2.17
yt-dlp==2023.7.6
playsound==1.2.2
Loading…
Cancel
Save