Merge pull request #523 from JasonLovesDoggo/master

polly added
4 years ago · be978f4d80
parent 4ef4082631 d9015df8ed
commit be978f4d80
5 changed files with 122 additions and 5 deletions
--- a/.env.template
+++ b/.env.template
@ -32,8 +32,8 @@ MAX_COMMENT_LENGTH="500"
 OPACITY="1"

 # see TTSwrapper.py for all valid options
-VOICE="en_us_001" # e.g. en_us_002
+VOICE="Matthew" # e.g. en_us_002
+TTsChoice="polly" # todo add docs

 # IN-PROGRESS - not yet implemented
-TTsChoice="TikTok" # todo add docs
 STORYMODE="False"
--- a/TTS/POLLY.py
+++ b/TTS/POLLY.py
@ -0,0 +1,115 @@
+import os
+import random
+import re
+
+import requests
+import sox
+from moviepy.audio.AudioClip import concatenate_audioclips, CompositeAudioClip
+from moviepy.audio.io.AudioFileClip import AudioFileClip
+voices = {'neural': [
+    'Ivy',
+    'Joanna',
+    'Kendra',
+    'Kimberly',
+    'Salli',
+    'Joey',
+    'Justin',
+    'Matthew',
+    'Amy',
+    'Emma',
+    'Brian'
+
+], 'standard': [
+    'Ivy',
+    'Joanna',
+    'Kendra',
+    'Kimberly',
+    'Salli',
+    'Joey',
+    'Justin',
+    'Matthew',
+    "Russell",
+    "Nicole",
+    "Amy",
+    "Emma",
+    "Brian",
+    "Aditi",
+    "Raveena",
+    "Geraint"
+]}
+
+
+# valid voices https://lazypy.ro/tts/
+
+
+class POLLY:
+    def __init__(self):
+        self.url = 'https://streamlabs.com/polly/speak'
+
+    def tts(
+            self,
+            req_text: str = "Amazon Text To Speech",
+            filename: str = "title.mp3",
+            random_speaker=False,
+            censer=False,
+    ):
+        if random_speaker:
+            voice = self.randomvoice()
+        else:
+            if not os.getenv('VOICE'):
+                return ValueError('Please set the environment variable VOICE to a valid voice. options are: {}'.format(voices))
+            voice = str(os.getenv("VOICE")).capitalize()
+        body = {'voice': voice, 'text': req_text}
+        response = requests.post(self.url, data=body)
+        try:
+            voice_data = requests.get(response.json()['speak_url'])
+            with open(filename, 'wb') as f:
+                f.write(voice_data.content)
+        except KeyError:
+            if response.json()['error'] == 'Text length is too long!':
+                chunks = [
+                    m.group().strip() for m in re.finditer(r" *((.{0,530})(\.|.$))", req_text)
+                ]
+
+                audio_clips = []
+                cbn = sox.Combiner()
+
+                chunkId = 0
+                for chunk in chunks:
+                    body = {'voice': 'Brian', 'text': chunk}
+                    resp = requests.post(self.url, data=body)
+                    voice_data = requests.get(resp.json()['speak_url'])
+                    with open(filename.replace(".mp3", f"-{chunkId}.mp3"), "wb") as out:
+                        out.write(voice_data.content)
+
+                    audio_clips.append(filename.replace(".mp3", f"-{chunkId}.mp3"))
+
+                    chunkId = chunkId + 1
+                try:
+                    if len(audio_clips) > 1:
+                        cbn.convert(samplerate=44100, n_channels=2)
+                        cbn.build(audio_clips, filename, "concatenate")
+                    else:
+                        os.rename(audio_clips[0], filename)
+                except (sox.core.SoxError,
+                        FileNotFoundError):  # https://github.com/JasonLovesDoggo/RedditVideoMakerBot/issues/67#issuecomment-1150466339
+                    for clip in audio_clips:
+                        i = audio_clips.index(clip)  # get the index of the clip
+                        audio_clips = (
+                                audio_clips[:i] + [AudioFileClip(clip)] + audio_clips[i + 1:]
+                        )  # replace the clip with an AudioFileClip
+                    audio_concat = concatenate_audioclips(audio_clips)
+                    audio_composite = CompositeAudioClip([audio_concat])
+                    audio_composite.write_audiofile(filename, 44100, 2, 2000, None)
+
+    def make_readable(self, text):
+        """
+        Amazon Polly fails to read some symbols properly such as '& (and)'.
+        So we normalize input text before passing it to the service
+        """
+        text = text.replace('&', 'and')
+        return text
+
+    def randomvoice(self):
+        valid = voices['neural'] + voices['standard']
+        return random.choice(valid)
--- a/TTS/TikTok.py
+++ b/TTS/TikTok.py
@ -110,6 +110,7 @@ class TikTok:  # TikTok Text-to-Speech Wrapper
                r = session.post(
                    f"{self.URI_BASE}{voice}&req_text={chunk}&speaker_map_type=0"
                )
+            print(r.text)
            vstr = [r.json()["data"]["v_str"]][0]
            b64d = base64.b64decode(vstr)

--- a/TTS/swapper.py
+++ b/TTS/swapper.py
@ -3,9 +3,10 @@ from os import getenv
 from dotenv import load_dotenv

 from TTS.GTTS import GTTS
+from TTS.POLLY import POLLY
 from TTS.TikTok import TikTok

-CHOICE_DIR = {"tiktok": TikTok, "gtts": GTTS}
+CHOICE_DIR = {"tiktok": TikTok, "gtts": GTTS, 'polly': POLLY}


 class TTS:
--- a/video_creation/background.py
+++ b/video_creation/background.py
@ -14,7 +14,7 @@ def get_start_and_end_times(video_length, length_of_clip):


 def download_background():
-    """Downloads the backgrounds/s video from youtube."""
+    """Downloads the backgrounds/s video from YouTube."""
    Path("./assets/backgrounds/").mkdir(parents=True, exist_ok=True)
    background_options = [  # uri , filename , credit
        ("https://www.youtube.com/watch?v=n_Dv4JMiwK8", "parkour.mp4", "bbswitzer"),
@ -25,7 +25,7 @@ def download_background():
        # ),
    ]
    # note: make sure the file name doesn't include an - in it
-    if not len(listdir("./assets/backgrounds")) <= len(
+    if not len(listdir("./assets/backgrounds")) >= len(
        background_options
    ):  # if there are any background videos not installed
        print_step(