import base64 import os import random import re import sox import requests from moviepy.audio.AudioClip import concatenate_audioclips, CompositeAudioClip from moviepy.audio.io.AudioFileClip import AudioFileClip from requests.adapters import HTTPAdapter, Retry # from profanity_filter import ProfanityFilter # pf = ProfanityFilter() # Code by @JasonLovesDoggo # https://twitter.com/scanlime/status/1512598559769702406 nonhuman = [ # DISNEY VOICES "en_us_ghostface", # Ghost Face "en_us_chewbacca", # Chewbacca "en_us_c3po", # C3PO "en_us_stitch", # Stitch "en_us_stormtrooper", # Stormtrooper "en_us_rocket", # Rocket # ENGLISH VOICES ] human = [ "en_au_001", # English AU - Female "en_au_002", # English AU - Male "en_uk_001", # English UK - Male 1 "en_uk_003", # English UK - Male 2 "en_us_001", # English US - Female (Int. 1) "en_us_002", # English US - Female (Int. 2) "en_us_006", # English US - Male 1 "en_us_007", # English US - Male 2 "en_us_009", # English US - Male 3 "en_us_010", ] voices = nonhuman + human noneng = [ "fr_001", # French - Male 1 "fr_002", # French - Male 2 "de_001", # German - Female "de_002", # German - Male "es_002", # Spanish - Male # AMERICA VOICES "es_mx_002", # Spanish MX - Male "br_001", # Portuguese BR - Female 1 "br_003", # Portuguese BR - Female 2 "br_004", # Portuguese BR - Female 3 "br_005", # Portuguese BR - Male # ASIA VOICES "id_001", # Indonesian - Female "jp_001", # Japanese - Female 1 "jp_003", # Japanese - Female 2 "jp_005", # Japanese - Female 3 "jp_006", # Japanese - Male "kr_002", # Korean - Male 1 "kr_003", # Korean - Female "kr_004", # Korean - Male 2 ] # good_voices = {'good': ['en_us_002', 'en_us_006'], # 'ok': ['en_au_002', 'en_uk_001']} # less en_us_stormtrooper more less en_us_rocket en_us_ghostface class TTTTSWrapper: # TikTok Text-to-Speech Wrapper def __init__(self): self.URI_BASE = "https://api16-normal-useast5.us.tiktokv.com/media/api/text/speech/invoke/?text_speaker=" def tts( self, req_text: str = "TikTok Text To Speech", filename: str = "title.mp3", random_speaker: bool = False, censer=False, ): req_text = req_text.replace("+", "plus").replace(" ", "+").replace("&", "and") if censer: # req_text = pf.censor(req_text) pass voice = ( self.randomvoice() if random_speaker else (os.getenv("VOICE") or random.choice(human)) ) chunks = [ m.group().strip() for m in re.finditer(r" *((.{0,200})(\.|.$))", req_text) ] audio_clips = [] cbn = sox.Combiner() cbn.set_input_format(file_type=['mp3']) chunkId = 0 for chunk in chunks: try: r = requests.post( f"{self.URI_BASE}{voice}&req_text={chunk}&speaker_map_type=0" ) except requests.exceptions.SSLError: # https://stackoverflow.com/a/47475019/18516611 session = requests.Session() retry = Retry(connect=3, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) session.mount("http://", adapter) session.mount("https://", adapter) r = session.post( f"{self.URI_BASE}{voice}&req_text={chunk}&speaker_map_type=0" ) vstr = [r.json()["data"]["v_str"]][0] b64d = base64.b64decode(vstr) with open(filename.replace(".mp3", f"-{chunkId}.mp3"), "wb") as out: out.write(b64d) audio_clips.append(filename.replace(".mp3", f"-{chunkId}.mp3")) chunkId = chunkId + 1 try: if len(audio_clips) > 1: cbn.convert(samplerate=44100, n_channels=2) cbn.build(audio_clips, filename, "concatenate") else: os.rename(audio_clips[0], filename) except sox.core.SoxError: for clip in audio_clips: i = audio_clips.index(clip) # get the index of the clip audio_clips = audio_clips[:i] + [AudioFileClip(clip)] + audio_clips[ i + 1:] # replace the clip with an AudioFileClip audio_concat = concatenate_audioclips(audio_clips) audio_composite = CompositeAudioClip([audio_concat]) audio_composite.write_audiofile(filename, 44100, 2, 2000, None) @staticmethod def randomvoice(): ok_or_good = random.randrange(1, 10) if ok_or_good == 1: # 1/10 chance of ok voice return random.choice(voices) return random.choice(human) # 9/10 chance of good voice