think I fixed polly needs testing tho

4 years ago · c58efb23fc
parent 7eb8ace07e
commit c58efb23fc
3 changed files with 121 additions and 1 deletions
--- a/TTS/POLLY.py
+++ b/TTS/POLLY.py
@ -0,0 +1,115 @@
 import os
 import random
 import re
 import requests
 import sox
 from moviepy.audio.AudioClip import concatenate_audioclips, CompositeAudioClip
 from moviepy.audio.io.AudioFileClip import AudioFileClip
 voices = {'neural': [
    'Ivy',
    'Joanna',
    'Kendra',
    'Kimberly',
    'Salli',
    'Joey',
    'Justin',
    'Matthew',
    'Amy',
    'Emma',
    'Brian'
 ], 'standard': [
    'Ivy',
    'Joanna',
    'Kendra',
    'Kimberly',
    'Salli',
    'Joey',
    'Justin',
    'Matthew',
    "Russell",
    "Nicole",
    "Amy",
    "Emma",
    "Brian",
    "Aditi",
    "Raveena",
    "Geraint"
 ]}
 # valid voices https://lazypy.ro/tts/
 class POLLY:
    def __init__(self):
        self.url = 'https://streamlabs.com/polly/speak'
    def tts(
            self,
            req_text: str = "Amazon Text To Speech",
            filename: str = "title.mp3",
            random_speaker=False,
            censer=False,
    ):
        if random_speaker:
            voice = self.randomvoice()
        else:
            if not os.getenv('VOICE'):
                return ValueError('Please set the environment variable VOICE to a valid voice. options are: {}'.format(voices))
            voice = (os.getenv("VOICE"))
        body = {'voice': voice, 'text': req_text}
        response = requests.post(self.url, data=body)
        try:
            voice_data = requests.get(response.json()['speak_url'])
            with open(filename, 'wb') as f:
                f.write(voice_data.content)
        except KeyError:
            if response.json()['error'] == 'Text length is too long!':
                chunks = [
                    m.group().strip() for m in re.finditer(r" *((.{0,299})(\.|.$))", req_text)
                ]
                audio_clips = []
                cbn = sox.Combiner()
                chunkId = 0
                for chunk in chunks:
                    body = {'voice': 'Brian', 'text': chunk}
                    resp = requests.post(self.url, data=body)
                    voice_data = requests.get(resp.json()['speak_url'])
                    with open(filename.replace(".mp3", f"-{chunkId}.mp3"), "wb") as out:
                        out.write(voice_data.content)
                    audio_clips.append(filename.replace(".mp3", f"-{chunkId}.mp3"))
                    chunkId = chunkId + 1
                try:
                    if len(audio_clips) > 1:
                        cbn.convert(samplerate=44100, n_channels=2)
                        cbn.build(audio_clips, filename, "concatenate")
                    else:
                        os.rename(audio_clips[0], filename)
                except (sox.core.SoxError,
                        FileNotFoundError):  # https://github.com/JasonLovesDoggo/RedditVideoMakerBot/issues/67#issuecomment-1150466339
                    for clip in audio_clips:
                        i = audio_clips.index(clip)  # get the index of the clip
                        audio_clips = (
                                audio_clips[:i] + [AudioFileClip(clip)] + audio_clips[i + 1:]
                        )  # replace the clip with an AudioFileClip
                    audio_concat = concatenate_audioclips(audio_clips)
                    audio_composite = CompositeAudioClip([audio_concat])
                    audio_composite.write_audiofile(filename, 44100, 2, 2000, None)
    def make_readable(self, text):
        """
        Amazon Polly fails to read some symbols properly such as '& (and)'.
        So we normalize input text before passing it to the service
        """
        text = text.replace('&', 'and')
        return text
    def randomvoice(self):
        valid = voices['neural'] + voices['standard']
        return random.choice(valid)
--- a/TTS/TikTok.py
+++ b/TTS/TikTok.py
@ -110,6 +110,7 @@ class TikTok:  # TikTok Text-to-Speech Wrapper
                r = session.post(
                    f"{self.URI_BASE}{voice}&req_text={chunk}&speaker_map_type=0"
                )
            print(r.text)
            vstr = [r.json()["data"]["v_str"]][0]
            b64d = base64.b64decode(vstr)
@ -141,3 +142,6 @@ class TikTok:  # TikTok Text-to-Speech Wrapper
        if ok_or_good == 1:  # 1/10 chance of ok voice
            return random.choice(voices)
        return random.choice(human)  # 9/10 chance of good voice
 TikTok().tts('Hello World', '../TTS/hello.mp3')
--- a/TTS/swapper.py
+++ b/TTS/swapper.py
@ -3,9 +3,10 @@ from os import getenv
 from dotenv import load_dotenv
 from TTS.GTTS import GTTS
 from TTS.POLLY import POLLY
 from TTS.TikTok import TikTok
-CHOICE_DIR = {"tiktok": TikTok, "gtts": GTTS}
+CHOICE_DIR = {"tiktok": TikTok, "gtts": GTTS, 'polly': POLLY}
 class TTS: