Made it so TikTok TTS can have more than 300 words

pull/2049/head
Alex 1 year ago
parent 340762e1b6
commit aebf2262c9

@ -1,80 +1,79 @@
# documentation for tiktok api: https://github.com/oscie57/tiktok-voice/wiki
import base64
import random import random
import time import time
from typing import Optional, Final from typing import Optional, Final
import requests, base64, re, sys
import requests from threading import Thread
from playsound import playsound
from utils import settings from utils import settings
__all__ = ["TikTok", "TikTokTTSException"] # define the endpoint data with URLs and corresponding response keys
ENDPOINT_DATA = [
disney_voices: Final[tuple] = ( {
"en_us_ghostface", # Ghost Face "url": "https://tiktok-tts.weilnet.workers.dev/api/generation",
"en_us_chewbacca", # Chewbacca "response": "data"
"en_us_c3po", # C3PO },
"en_us_stitch", # Stitch {
"en_us_stormtrooper", # Stormtrooper "url": "https://countik.com/api/text/speech",
"en_us_rocket", # Rocket "response": "v_data"
"en_female_madam_leota", # Madame Leota },
"en_male_ghosthost", # Ghost Host {
"en_male_pirate", # pirate "url": "https://gesserit.co/api/tiktok-tts",
) "response": "base64"
}
eng_voices: Final[tuple] = ( ]
"en_au_001", # English AU - Female
"en_au_002", # English AU - Male # define available voices for text-to-speech conversion
"en_uk_001", # English UK - Male 1 VOICES = [
"en_uk_003", # English UK - Male 2 # DISNEY VOICES
"en_us_001", # English US - Female (Int. 1) 'en_us_ghostface', # Ghost Face
"en_us_002", # English US - Female (Int. 2) 'en_us_chewbacca', # Chewbacca
"en_us_006", # English US - Male 1 'en_us_c3po', # C3PO
"en_us_007", # English US - Male 2 'en_us_stitch', # Stitch
"en_us_009", # English US - Male 3 'en_us_stormtrooper', # Stormtrooper
"en_us_010", # English US - Male 4 'en_us_rocket', # Rocket
"en_male_narration", # Narrator # ENGLISH VOICES
"en_male_funny", # Funny 'en_au_001', # English AU - Female
"en_female_emotional", # Peaceful 'en_au_002', # English AU - Male
"en_male_cody", # Serious 'en_uk_001', # English UK - Male 1
) 'en_uk_003', # English UK - Male 2
'en_us_001', # English US - Female (Int. 1)
non_eng_voices: Final[tuple] = ( 'en_us_002', # English US - Female (Int. 2)
# Western European voices 'en_us_006', # English US - Male 1
"fr_001", # French - Male 1 'en_us_007', # English US - Male 2
"fr_002", # French - Male 2 'en_us_009', # English US - Male 3
"de_001", # German - Female 'en_us_010', # English US - Male 4
"de_002", # German - Male # EUROPE VOICES
"es_002", # Spanish - Male 'fr_001', # French - Male 1
"it_male_m18", # Italian - Male 'fr_002', # French - Male 2
# South american voices 'de_001', # German - Female
"es_mx_002", # Spanish MX - Male 'de_002', # German - Male
"br_001", # Portuguese BR - Female 1 'es_002', # Spanish - Male
"br_003", # Portuguese BR - Female 2 # AMERICA VOICES
"br_004", # Portuguese BR - Female 3 'es_mx_002', # Spanish MX - Male
"br_005", # Portuguese BR - Male 'br_001', # Portuguese BR - Female 1
# asian voices 'br_003', # Portuguese BR - Female 2
"id_001", # Indonesian - Female 'br_004', # Portuguese BR - Female 3
"jp_001", # Japanese - Female 1 'br_005', # Portuguese BR - Male
"jp_003", # Japanese - Female 2 # ASIA VOICES
"jp_005", # Japanese - Female 3 'id_001', # Indonesian - Female
"jp_006", # Japanese - Male 'jp_001', # Japanese - Female 1
"kr_002", # Korean - Male 1 'jp_003', # Japanese - Female 2
"kr_003", # Korean - Female 'jp_005', # Japanese - Female 3
"kr_004", # Korean - Male 2 'jp_006', # Japanese - Male
) 'kr_002', # Korean - Male 1
'kr_003', # Korean - Female
vocals: Final[tuple] = ( 'kr_004', # Korean - Male 2
"en_female_f08_salut_damour", # Alto # SINGING VOICES
"en_male_m03_lobby", # Tenor 'en_female_f08_salut_damour', # Alto
"en_male_m03_sunshine_soon", # Sunshine Soon 'en_male_m03_lobby', # Tenor
"en_female_f08_warmy_breeze", # Warmy Breeze 'en_female_f08_warmy_breeze', # Warmy Breeze
"en_female_ht_f08_glorious", # Glorious 'en_male_m03_sunshine_soon', # Sunshine Soon
"en_male_sing_funny_it_goes_up", # It Goes Up # OTHER
"en_male_m2_xhxs_m03_silly", # Chipmunk 'en_male_narration', # narrator
"en_female_ht_f08_wonderful_world", # Dramatic 'en_male_funny', # wacky
) 'en_female_emotional', # peaceful
]
class TikTok: class TikTok:
"""TikTok Text-to-Speech Wrapper""" """TikTok Text-to-Speech Wrapper"""
@ -90,76 +89,86 @@ class TikTok:
self.max_chars = 200 self.max_chars = 200
self._session = requests.Session() self._session = requests.Session()
# set the headers to the session, so we don't have to do it for every request
self._session.headers = headers self._session.headers = headers
def run(self, text: str, filepath: str, random_voice: bool = False): def run(self, text: str, filepath: str, random_voice: bool = False, play_sound: bool = False):
if random_voice: if random_voice:
voice = self.random_voice() voice = self.random_voice()
else: else:
# if tiktok_voice is not set in the config file, then use a random voice
voice = settings.config["settings"]["tts"].get("tiktok_voice", None) voice = settings.config["settings"]["tts"].get("tiktok_voice", None)
# get the audio from the TikTok API chunks = self._split_text(text)
data = self.get_voices(voice=voice, text=text)
# check if there was an error in the request for entry in ENDPOINT_DATA:
status_code = data["status_code"] endpoint_valid = True
if status_code != 0: audio_data = ["" for _ in range(len(chunks))]
raise TikTokTTSException(status_code, data["message"])
# decode data from base64 to binary def generate_audio_chunk(index: int, chunk: str) -> None:
try: nonlocal endpoint_valid
raw_voices = data["data"]["v_str"]
except:
print(
"The TikTok TTS returned an invalid response. Please try again later, and report this bug."
)
raise TikTokTTSException(0, "Invalid response")
decoded_voices = base64.b64decode(raw_voices)
# write voices to specified filepath if not endpoint_valid:
with open(filepath, "wb") as out: return
out.write(decoded_voices)
def get_voices(self, text: str, voice: Optional[str] = None) -> dict: try:
"""If voice is not passed, the API will try to use the most fitting voice""" response = requests.post(
# sanitize text entry["url"],
text = text.replace("+", "plus").replace("&", "and").replace("r/", "") json={
"text": chunk,
"voice": voice
}
)
# prepare url request if response.status_code == 200:
params = {"req_text": text, "speaker_map_type": 0, "aid": 1233} audio_data[index] = response.json()[entry["response"]]
else:
endpoint_valid = False
if voice is not None: except requests.RequestException as e:
params["text_speaker"] = voice print(f"Error: {e}")
sys.exit()
# send request threads = []
try: for index, chunk in enumerate(chunks):
response = self._session.post(self.URI_BASE, params=params) thread = Thread(target=generate_audio_chunk, args=(index, chunk))
except ConnectionError: threads.append(thread)
time.sleep(random.randrange(1, 7)) thread.start()
response = self._session.post(self.URI_BASE, params=params)
return response.json() for thread in threads:
thread.join()
@staticmethod if not endpoint_valid:
def random_voice() -> str: continue
return random.choice(eng_voices)
audio_bytes = base64.b64decode("".join(audio_data))
with open(filepath, "wb") as file:
file.write(audio_bytes)
print(f"File '{filepath}' has been generated successfully.")
class TikTokTTSException(Exception): if play_sound:
def __init__(self, code: int, message: str): playsound(filepath)
self._code = code
self._message = message
def __str__(self) -> str: break
if self._code == 1:
return f"Code: {self._code}, reason: probably the aid value isn't correct, message: {self._message}"
if self._code == 2: def _split_text(self, text: str) -> list[str]:
return f"Code: {self._code}, reason: the text is too long, message: {self._message}" merged_chunks: list[str] = []
seperated_chunks: list[str] = re.findall(r'.*?[.,!?:;-]|.+', text)
if self._code == 4: for i, chunk in enumerate(seperated_chunks):
return f"Code: {self._code}, reason: the speaker doesn't exist, message: {self._message}" if len(chunk) > 300:
seperated_chunks[i:i+1] = re.findall(r'.*?[ ]|.+', chunk)
return f"Code: {self._message}, reason: unknown, message: {self._message}" merged_chunk = ""
for seperated_chunk in seperated_chunks:
if len(merged_chunk) + len(seperated_chunk) <= 300:
merged_chunk += seperated_chunk
else:
merged_chunks.append(merged_chunk)
merged_chunk = seperated_chunk
merged_chunks.append(merged_chunk)
return merged_chunks
@staticmethod
def random_voice() -> str:
return random.choice(VOICES)

@ -20,4 +20,5 @@ torch==2.0.1
transformers==4.29.2 transformers==4.29.2
ffmpeg-python==0.2.0 ffmpeg-python==0.2.0
elevenlabs==0.2.17 elevenlabs==0.2.17
yt-dlp==2023.7.6 yt-dlp==2023.7.6
playsound==1.2.2
Loading…
Cancel
Save