clean-up, add comments, add azure-cognitiveservices-speech to requirements.txt

11 months ago · 34ca6c79ca
parent ab4f5bd46b
commit 34ca6c79ca
2 changed files with 13 additions and 6 deletions
--- a/TTS/azuretts.py
+++ b/TTS/azuretts.py
@ -1,14 +1,11 @@
-import os
-import io
 import random
-from pydub import AudioSegment
-from pydub.playback import play
 import azure.cognitiveservices.speech as speechsdk
 from utils import settings


 class AzureTTS:
    def __init__(self):
+        # Initialize the AzureTTS class with necessary configurations
        self.voices = []
        self.api_key = settings.config["settings"]["tts"]["azure_api_key"]
        self.region = settings.config["settings"]["tts"]["azure_region"]
@ -16,31 +13,36 @@ class AzureTTS:
        self.rate = settings.config["settings"]["tts"]["azure_voice_speed_boost"]

    def run(self, text: str, filepath: str, random_voice=False):
+        # Validate API key and region
        if not self.api_key or not self.region:
            raise ValueError("Azure API key and region must be set in settings.")

+        # Validate the rate value
        if not isinstance(self.rate, int) or not (0 <= self.rate <= 100):
            raise ValueError(
                "azure_voice_speed_boost must be an integer between 0 and 100."
            )

+        # Configure speech synthesis with Azure
        speech_config = speechsdk.SpeechConfig(
            subscription=self.api_key, region=self.region
        )
        audio_config = speechsdk.audio.AudioOutputConfig(filename=filepath)

+        # Select voice: random or default
        if random_voice:
            voice_name = self.random_voice()
        else:
            voice_name = self.default_voice

+        # Set the voice name in the speech configuration
        speech_config.speech_synthesis_voice_name = voice_name
        speech_synthesizer = speechsdk.SpeechSynthesizer(
            speech_config=speech_config, audio_config=audio_config
        )
        rate_with_percent = f"{self.rate}%"

-        # Construct SSML with the specified rate
+        # Construct SSML (Speech Synthesis Markup Language) with the specified rate
        ssml_text = f"""
        <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="en-US">
            <voice name="{voice_name}">
@ -48,19 +50,23 @@ class AzureTTS:
            </voice>
        </speak>
        """
+        # Perform speech synthesis
        result = speech_synthesizer.speak_ssml_async(ssml_text).get()

+        # Check the result of the synthesis
        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
            print(f"Speech synthesized for text [{text}] and saved to [{filepath}]")
        else:
            print(f"Speech synthesis failed: {result.reason}")

    def random_voice(self):
+        # Return a random voice from the available voices
        if not self.voices:
            self.voices = self.fetch_available_voices()
        return random.choice(self.voices)

    def fetch_available_voices(self):
+        # Return a list of available voices
        return [
            "en-US-AndrewMultilingualNeural",
            "en-US-AvaMultilingualNeural",
@ -74,4 +80,4 @@ class AzureTTS:
            "zh-CN-XiaochenMultilingualNeural",
            "zh-CN-XiaoyuMultilingualNeural",
            "zh-CN-YunyiMultilingualNeural",
-        ]
+        ]
--- a/requirements.txt
+++ b/requirements.txt
@ -22,3 +22,4 @@ ffmpeg-python==0.2.0
 elevenlabs==1.8.1
 yt-dlp==2024.10.7
 numpy==1.26.4
+azure-cognitiveservices-speech==1.41.1