Add AWS Polly for text to speech

3 years ago · b2d4a0befd
parent 1462b1d9b6
commit b2d4a0befd
2 changed files with 48 additions and 36 deletions
--- a/README.md
+++ b/README.md
@ -7,7 +7,6 @@ All done WITHOUT video editing or asset compiling. Just pure ✨programming magi
 Created by Lewis Menelaws & [TMRRW](https://tmrrwinc.ca)
 [<picture>
  <source media="(prefers-color-scheme: dark)" srcset="https://user-images.githubusercontent.com/6053155/170528535-e274dc0b-7972-4b27-af22-637f8c370133.png">
  <source media="(prefers-color-scheme: light)" srcset="https://user-images.githubusercontent.com/6053155/170528582-cb6671e7-5a2f-4bd4-a048-0e6cfa54f0f7.png">
  <img src="https://user-images.githubusercontent.com/6053155/170528582-cb6671e7-5a2f-4bd4-a048-0e6cfa54f0f7.png" width="350">
@ -32,20 +31,15 @@ These videos on TikTok, YouTube and Instagram get MILLIONS of views across all p
 ## Installation 👩‍💻
 1. Clone this repository
-
+2. Rename `.env.template` to `.env` and replace all values with the appropriate fields. To get Reddit keys (**required**), visit [the Reddit Apps page.](https://www.reddit.com/prefs/apps) TL;DR set up an app that is a "script". Copy your keys into the `.env` file, along with whether your account uses two-factor authentication.
-2. Run `pip3 install -r requirements.txt`
+3. Have your AWS credentials under ~/.aws/credentials with a profile called 'polly' with AWS Polly full permissions
-3. Run `playwright install` and `playwright install-deps`.
+4. Run `pip3 install -r requirements.txt`
-4. 
+5. Run `playwright install` and `playwright install-deps`.
-	4a **Automatic Install**: Run `python3 main.py` and type 'yes' to activate the setup assistant.
+6. Run `python3 main.py`
 	4b **Manual Install**: Rename `.env.template` to `.env` and replace all values with the appropriate fields. To get Reddit keys (**required**), visit [the Reddit Apps page.](https://www.reddit.com/prefs/apps) TL;DR set up an app that is a "script". Copy your keys into the `.env` file, along with whether your account uses two-factor authentication.
 5. Run `python3 main.py` (unless you chose automatic install, then the installer will automatically run main.py)
 7. Enjoy 😎
 If you want to see more detailed guide, please refer to the official [documentation](https://luka-hietala.gitbook.io/documentation-for-the-reddit-bot/).
-\*The Documentation is still being developed and worked on, please be patient as we change / add new knowledge!
+*The Documentation is still being developed and worked on, please be patient as we change / add new knowledge!
 ## Contributing & Ways to improve 📈
@ -53,10 +47,8 @@ In its current state, this bot does exactly what it needs to do. However, lots o
 I have tried to simplify the code so anyone can read it and start contributing at any skill level. Don't be shy :) contribute!
- [x] Allowing users to choose a reddit thread instead of being randomized.
+- [ ] Allowing users to choose a reddit thread instead of being randomized.
 - [ ] Allowing users to choose a background that is picked instead of the Minecraft one.
 - [x] Allowing users to choose between any subreddit.
 - [ ] Allowing users to change voice.
 - [ ] Creating better documentation and adding a command line interface.
 Please read our [contributing guidelines](CONTRIBUTING.md) for more detailed information.
--- a/video_creation/voices.py
+++ b/video_creation/voices.py
@ -1,9 +1,13 @@
 from gtts import gTTS
 from pathlib import Path
 from mutagen.mp3 import MP3
 from utils.console import print_step, print_substep
 from rich.progress import track
-
+from boto3 import Session
 from botocore.exceptions import BotoCoreError, ClientError
 import os
 import sys
 import subprocess
 from tempfile import gettempdir
 def save_text_to_mp3(reddit_obj):
    """Saves Text to MP3 files.
@ -13,32 +17,48 @@ def save_text_to_mp3(reddit_obj):
    """
    print_step("Saving Text to MP3 files...")
    length = 0
    session = Session(profile_name="polly")
    polly = session.client("polly")
    # Create a folder for the mp3 files.
    Path("assets/mp3").mkdir(parents=True, exist_ok=True)
-    tts = gTTS(text=reddit_obj["thread_title"], lang="en", slow=False)
+    text_to_speech(polly, "title", reddit_obj["thread_title"])
    tts.save(f"assets/mp3/title.mp3")
    length += MP3(f"assets/mp3/title.mp3").info.length
    try:
        Path(f"assets/mp3/posttext.mp3").unlink()
    except OSError as e:
        pass
    if reddit_obj["thread_post"] != "":
        tts = gTTS(text=reddit_obj["thread_post"], lang="en", slow=False)
        tts.save(f"assets/mp3/posttext.mp3")
        length += MP3(f"assets/mp3/posttext.mp3").info.length
    for idx, comment in track(enumerate(reddit_obj["comments"]), "Saving..."):
        # ! Stop creating mp3 files if the length is greater than 50 seconds. This can be longer, but this is just a good starting point
        if length > 50:
            break
-        tts = gTTS(text=comment["comment_body"], lang="en", slow=False)
+
-        tts.save(f"assets/mp3/{idx}.mp3")
+        text_to_speech(polly, idx, comment["comment_body"])
        length += MP3(f"assets/mp3/{idx}.mp3").info.length
-    print_substep("Saved Text to MP3 files successfully.", style="bold green")
+    print_substep(f"Saved Text to MP3 files successfully.", style="bold green")
    # ! Return the index so we know how many screenshots of comments we need to make.
    return length, idx
 def text_to_speech(polly, idx, text):
    try:
    # Request speech synthesis
        response = polly.synthesize_speech(Text=text, OutputFormat="mp3",
                                            VoiceId="Joanna", Engine = 'neural')
    except (BotoCoreError, ClientError) as error:
        # The service returned an error, exit gracefully
        print(error)
        sys.exit(-1)
    # Access the audio stream from the response
    if "AudioStream" in response:
        output = f"assets/mp3/{idx}.mp3"
        file = open(output, 'wb')
        file.write(response['AudioStream'].read())
        file.close()
        #print_substep(f"Saved Text {idx} to MP3 files successfully.", style="bold green")
    else:
        # The response didn't contain audio data, exit gracefully
        print("Could not stream audio")
        sys.exit(-1)