Made it so TikTok TTS can have more than 300 words

pull/2049/head
Alex 1 year ago
parent 340762e1b6
commit aebf2262c9

@ -1,80 +1,79 @@
# documentation for tiktok api: https://github.com/oscie57/tiktok-voice/wiki
import base64
import random
import time
from typing import Optional, Final
import requests
import requests, base64, re, sys
from threading import Thread
from playsound import playsound
from utils import settings
__all__ = ["TikTok", "TikTokTTSException"]
disney_voices: Final[tuple] = (
"en_us_ghostface", # Ghost Face
"en_us_chewbacca", # Chewbacca
"en_us_c3po", # C3PO
"en_us_stitch", # Stitch
"en_us_stormtrooper", # Stormtrooper
"en_us_rocket", # Rocket
"en_female_madam_leota", # Madame Leota
"en_male_ghosthost", # Ghost Host
"en_male_pirate", # pirate
)
eng_voices: Final[tuple] = (
"en_au_001", # English AU - Female
"en_au_002", # English AU - Male
"en_uk_001", # English UK - Male 1
"en_uk_003", # English UK - Male 2
"en_us_001", # English US - Female (Int. 1)
"en_us_002", # English US - Female (Int. 2)
"en_us_006", # English US - Male 1
"en_us_007", # English US - Male 2
"en_us_009", # English US - Male 3
"en_us_010", # English US - Male 4
"en_male_narration", # Narrator
"en_male_funny", # Funny
"en_female_emotional", # Peaceful
"en_male_cody", # Serious
)
non_eng_voices: Final[tuple] = (
# Western European voices
"fr_001", # French - Male 1
"fr_002", # French - Male 2
"de_001", # German - Female
"de_002", # German - Male
"es_002", # Spanish - Male
"it_male_m18", # Italian - Male
# South american voices
"es_mx_002", # Spanish MX - Male
"br_001", # Portuguese BR - Female 1
"br_003", # Portuguese BR - Female 2
"br_004", # Portuguese BR - Female 3
"br_005", # Portuguese BR - Male
# asian voices
"id_001", # Indonesian - Female
"jp_001", # Japanese - Female 1
"jp_003", # Japanese - Female 2
"jp_005", # Japanese - Female 3
"jp_006", # Japanese - Male
"kr_002", # Korean - Male 1
"kr_003", # Korean - Female
"kr_004", # Korean - Male 2
)
vocals: Final[tuple] = (
"en_female_f08_salut_damour", # Alto
"en_male_m03_lobby", # Tenor
"en_male_m03_sunshine_soon", # Sunshine Soon
"en_female_f08_warmy_breeze", # Warmy Breeze
"en_female_ht_f08_glorious", # Glorious
"en_male_sing_funny_it_goes_up", # It Goes Up
"en_male_m2_xhxs_m03_silly", # Chipmunk
"en_female_ht_f08_wonderful_world", # Dramatic
)
# define the endpoint data with URLs and corresponding response keys
ENDPOINT_DATA = [
{
"url": "https://tiktok-tts.weilnet.workers.dev/api/generation",
"response": "data"
},
{
"url": "https://countik.com/api/text/speech",
"response": "v_data"
},
{
"url": "https://gesserit.co/api/tiktok-tts",
"response": "base64"
}
]
# define available voices for text-to-speech conversion
VOICES = [
# DISNEY VOICES
'en_us_ghostface', # Ghost Face
'en_us_chewbacca', # Chewbacca
'en_us_c3po', # C3PO
'en_us_stitch', # Stitch
'en_us_stormtrooper', # Stormtrooper
'en_us_rocket', # Rocket
# ENGLISH VOICES
'en_au_001', # English AU - Female
'en_au_002', # English AU - Male
'en_uk_001', # English UK - Male 1
'en_uk_003', # English UK - Male 2
'en_us_001', # English US - Female (Int. 1)
'en_us_002', # English US - Female (Int. 2)
'en_us_006', # English US - Male 1
'en_us_007', # English US - Male 2
'en_us_009', # English US - Male 3
'en_us_010', # English US - Male 4
# EUROPE VOICES
'fr_001', # French - Male 1
'fr_002', # French - Male 2
'de_001', # German - Female
'de_002', # German - Male
'es_002', # Spanish - Male
# AMERICA VOICES
'es_mx_002', # Spanish MX - Male
'br_001', # Portuguese BR - Female 1
'br_003', # Portuguese BR - Female 2
'br_004', # Portuguese BR - Female 3
'br_005', # Portuguese BR - Male
# ASIA VOICES
'id_001', # Indonesian - Female
'jp_001', # Japanese - Female 1
'jp_003', # Japanese - Female 2
'jp_005', # Japanese - Female 3
'jp_006', # Japanese - Male
'kr_002', # Korean - Male 1
'kr_003', # Korean - Female
'kr_004', # Korean - Male 2
# SINGING VOICES
'en_female_f08_salut_damour', # Alto
'en_male_m03_lobby', # Tenor
'en_female_f08_warmy_breeze', # Warmy Breeze
'en_male_m03_sunshine_soon', # Sunshine Soon
# OTHER
'en_male_narration', # narrator
'en_male_funny', # wacky
'en_female_emotional', # peaceful
]
class TikTok:
"""TikTok Text-to-Speech Wrapper"""
@ -90,76 +89,86 @@ class TikTok:
self.max_chars = 200
self._session = requests.Session()
# set the headers to the session, so we don't have to do it for every request
self._session.headers = headers
def run(self, text: str, filepath: str, random_voice: bool = False):
def run(self, text: str, filepath: str, random_voice: bool = False, play_sound: bool = False):
if random_voice:
voice = self.random_voice()
else:
# if tiktok_voice is not set in the config file, then use a random voice
voice = settings.config["settings"]["tts"].get("tiktok_voice", None)
# get the audio from the TikTok API
data = self.get_voices(voice=voice, text=text)
chunks = self._split_text(text)
for entry in ENDPOINT_DATA:
endpoint_valid = True
audio_data = ["" for _ in range(len(chunks))]
# check if there was an error in the request
status_code = data["status_code"]
if status_code != 0:
raise TikTokTTSException(status_code, data["message"])
def generate_audio_chunk(index: int, chunk: str) -> None:
nonlocal endpoint_valid
if not endpoint_valid:
return
# decode data from base64 to binary
try:
raw_voices = data["data"]["v_str"]
except:
print(
"The TikTok TTS returned an invalid response. Please try again later, and report this bug."
response = requests.post(
entry["url"],
json={
"text": chunk,
"voice": voice
}
)
raise TikTokTTSException(0, "Invalid response")
decoded_voices = base64.b64decode(raw_voices)
# write voices to specified filepath
with open(filepath, "wb") as out:
out.write(decoded_voices)
if response.status_code == 200:
audio_data[index] = response.json()[entry["response"]]
else:
endpoint_valid = False
except requests.RequestException as e:
print(f"Error: {e}")
sys.exit()
def get_voices(self, text: str, voice: Optional[str] = None) -> dict:
"""If voice is not passed, the API will try to use the most fitting voice"""
# sanitize text
text = text.replace("+", "plus").replace("&", "and").replace("r/", "")
threads = []
for index, chunk in enumerate(chunks):
thread = Thread(target=generate_audio_chunk, args=(index, chunk))
threads.append(thread)
thread.start()
# prepare url request
params = {"req_text": text, "speaker_map_type": 0, "aid": 1233}
for thread in threads:
thread.join()
if voice is not None:
params["text_speaker"] = voice
if not endpoint_valid:
continue
# send request
try:
response = self._session.post(self.URI_BASE, params=params)
except ConnectionError:
time.sleep(random.randrange(1, 7))
response = self._session.post(self.URI_BASE, params=params)
audio_bytes = base64.b64decode("".join(audio_data))
return response.json()
with open(filepath, "wb") as file:
file.write(audio_bytes)
print(f"File '{filepath}' has been generated successfully.")
@staticmethod
def random_voice() -> str:
return random.choice(eng_voices)
if play_sound:
playsound(filepath)
break
class TikTokTTSException(Exception):
def __init__(self, code: int, message: str):
self._code = code
self._message = message
def _split_text(self, text: str) -> list[str]:
merged_chunks: list[str] = []
seperated_chunks: list[str] = re.findall(r'.*?[.,!?:;-]|.+', text)
def __str__(self) -> str:
if self._code == 1:
return f"Code: {self._code}, reason: probably the aid value isn't correct, message: {self._message}"
for i, chunk in enumerate(seperated_chunks):
if len(chunk) > 300:
seperated_chunks[i:i+1] = re.findall(r'.*?[ ]|.+', chunk)
if self._code == 2:
return f"Code: {self._code}, reason: the text is too long, message: {self._message}"
merged_chunk = ""
for seperated_chunk in seperated_chunks:
if len(merged_chunk) + len(seperated_chunk) <= 300:
merged_chunk += seperated_chunk
else:
merged_chunks.append(merged_chunk)
merged_chunk = seperated_chunk
if self._code == 4:
return f"Code: {self._code}, reason: the speaker doesn't exist, message: {self._message}"
merged_chunks.append(merged_chunk)
return merged_chunks
return f"Code: {self._message}, reason: unknown, message: {self._message}"
@staticmethod
def random_voice() -> str:
return random.choice(VOICES)

@ -21,3 +21,4 @@ transformers==4.29.2
ffmpeg-python==0.2.0
elevenlabs==0.2.17
yt-dlp==2023.7.6
playsound==1.2.2
Loading…
Cancel
Save