feat/fix: Fixed Elevenlabs API support by upgrading to v2 API, and downloading list of user voices when api key entered in interactive cli.

3 months ago · e4be20344e
parent 37a184bdaf
commit e4be20344e
5 changed files with 196 additions and 24 deletions
--- a/.gitignore
+++ b/.gitignore
@ -246,3 +246,6 @@ video_creation/data/envvars.txt

 config.toml
 *.exe
+
+# /feat/elevenlabs_v2_support testing script
+test_elevenlabs.py
--- a/TTS/elevenlabs.py
+++ b/TTS/elevenlabs.py
@ -1,38 +1,115 @@
 import random

-from elevenlabs import save
+from elevenlabs import Voice
 from elevenlabs.client import ElevenLabs

 from utils import settings


-class elevenlabs:
+class ElevenLabsGenerate:
    def __init__(self):
        self.max_chars = 2500
        self.client: ElevenLabs = None
+        self.available_voices: list[Voice] = []  # To store fetched Voice objects
+        self.voice_name_to_id_map: dict[str, str] = (
+            {}
+        )  # To store name -> id mapping for quick lookup
+
+    @staticmethod
+    def get_available_voices(api_key: str) -> dict[str, str]:
+        """
+        Fetches available voice names and their IDs from ElevenLabs using a given API key.
+        Returns a dictionary mapping voice names (lowercase) to voice IDs, or an empty dict on failure.
+        """
+        if not api_key:
+            return {}
+        try:
+            client = ElevenLabs(api_key=api_key)
+            print("\n[1/4] Fetching available ElevenLabs voices...")
+            # Keep client.voices.search as per original code and potential test dependency.
+            # The search method returns GetVoicesV2Response which has a 'voices' attribute.
+            response = client.voices.get_all()  # page-size specified as endpoint returns paginated data, only endpoint available for this
+            if not response.voices:
+                raise RuntimeError(
+                    "No voices found for your ElevenLabs account. Please check your API key."
+                )
+            voice_mapping = {
+                voice.name.lower(): voice.voice_id for voice in response.voices
+            }
+            print(f"✅ Success! Found {len(list(response.voices))} voices for your account:")
+            return voice_mapping
+        except Exception as e:
+            print(f"❌ Failed to fetch ElevenLabs voices: {e}")
+            return {}

    def run(self, text, filepath, random_voice: bool = False):
        if self.client is None:
            self.initialize()
+
+        voice_id_to_use = None
        if random_voice:
-            voice = self.randomvoice()
+            voice_id_to_use = self.randomvoice()
        else:
-            voice = str(settings.config["settings"]["tts"]["elevenlabs_voice_name"]).capitalize()
+            configured_voice_name = str(
+                settings.config["settings"]["tts"]["elevenlabs_voice_name"]
+            ).strip()
+            if not configured_voice_name:  # If name is blank, use random
+                voice_id_to_use = self.randomvoice()
+            else:
+                # Use the pre-built map for efficient lookup
+                voice_id_to_use = self.voice_name_to_id_map.get(
+                    configured_voice_name.lower()
+                )
+
+                if voice_id_to_use is None:
+                    # Provide a helpful error message if the configured voice is not found
+                    available_voice_names = list(
+                        self.voice_name_to_id_map.keys()
+                    )  # Get names from the map
+                    raise ValueError(
+                        f"Configured ElevenLabs voice '{configured_voice_name}' not found. "
+                        f"Available voices for your account are: {', '.join(available_voice_names)}. "
+                        "Please update 'elevenlabs_voice_name' in your config.toml or GUI."
+                    )

-        audio = self.client.generate(text=text, voice=voice, model="eleven_multilingual_v1")
-        save(audio=audio, filename=filepath)
+        audio_stream = self.client.text_to_speech.convert(
+            text=text, voice_id=voice_id_to_use, model_id="eleven_multilingual_v2"
+        )
+        with open(filepath, "wb") as f:
+            for chunk in audio_stream:
+                f.write(chunk)

    def initialize(self):
-        if settings.config["settings"]["tts"]["elevenlabs_api_key"]:
-            api_key = settings.config["settings"]["tts"]["elevenlabs_api_key"]
-        else:
+        api_key = settings.config["settings"]["tts"].get("elevenlabs_api_key")
+        if not api_key:
            raise ValueError(
                "You didn't set an Elevenlabs API key! Please set the config variable ELEVENLABS_API_KEY to a valid API key."
            )

        self.client = ElevenLabs(api_key=api_key)
+        # Fetch and store available voices during initialization
+        try:
+            self.available_voices = (
+                self.client.voices.get_all().voices
+            )  # Store Voice objects
+            self.voice_name_to_id_map = {
+                voice.name.lower(): voice.voice_id for voice in self.available_voices
+            }  # Build the map
+            if not self.available_voices:
+                raise RuntimeError(
+                    "No voices found for your ElevenLabs account. Please check your API key and account status."
+                )
+        except Exception as e:
+            raise RuntimeError(
+                f"Failed to fetch ElevenLabs voices: {e}. Please check your API key and internet connection."
+            ) from e

    def randomvoice(self):
        if self.client is None:
            self.initialize()
-        return random.choice(self.client.voices.get_all().voices).name
+        if not self.available_voices:  # Use the list of Voice objects
+            raise RuntimeError(
+                "No voices available from ElevenLabs account to choose from."
+            )
+        # Return the voice_id of a randomly selected voice object
+        return random.choice(self.available_voices).voice_id
--- a/requirements.txt
+++ b/requirements.txt
@ -17,5 +17,5 @@ spacy==3.8.7
 torch==2.7.0
 transformers==4.52.4
 ffmpeg-python==0.2.0
-elevenlabs==1.57.0
+elevenlabs==2.32.4
 yt-dlp==2025.5.22
--- a/utils/.config.template.toml
+++ b/utils/.config.template.toml
@ -46,8 +46,8 @@ background_thumbnail_font_color = { optional = true, default = "255,255,255", ex
 [settings.tts]
 voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", "OpenAI"], example = "tiktok", explanation = "The voice platform used for TTS generation. " }
 random_voice = { optional = false, type = "bool", default = true, example = true, options = [true, false,], explanation = "Randomizes the voice used for each comment" }
-elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella", explanation = "The voice used for elevenlabs", options = ["Adam", "Antoni", "Arnold", "Bella", "Domi", "Elli", "Josh", "Rachel", "Sam", ] }
 elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" }
+elevenlabs_voice_name = { optional = true, default = "", example = "Bella", explanation = "The voice used for ElevenLabs. Will be configured after entering your API key. Leave blank to use a random voice." }
 aws_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for AWS Polly" }
 streamlabs_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for Streamlabs Polly" }
 tiktok_voice = { optional = true, default = "en_us_001", example = "en_us_006", explanation = "The voice used for TikTok TTS" }
--- a/utils/settings.py
+++ b/utils/settings.py
@ -5,11 +5,16 @@ from typing import Dict, Tuple
 import toml
 from rich.console import Console

+import elevenlabs
 from utils.console import handle_input

+
 console = Console()
 config = dict  # autocomplete

+# A mapping of type names to their actual constructors for safe type casting.
+TYPE_CONSTRUCTORS = {"str": str, "int": int, "bool": bool, "float": float}
+

 def crawl(obj: dict, func=lambda x, y: print(x, y, end="\n"), path=None):
    if path is None:  # path Default argument value is mutable
@ -25,18 +30,78 @@ def check(value, checks, name):
    def get_check_value(key, default_result):
        return checks[key] if key in checks else default_result

+    # Dynamically fetch ElevenLabs voices if the API key is present
+    if name == "elevenlabs_voice_name":
+        # This relies on elevenlabs_api_key being processed first in the .toml file
+        api_key = config.get("settings", {}).get("tts", {}).get("elevenlabs_api_key")
+        if api_key:
+            console.print(
+                "\n[blue]Attempting to fetch your ElevenLabs voices...[/blue]"
+            )
+            try:
+                # This logic is ported from TTS/elevenlabs.py to avoid import issues
+                client = elevenlabs.ElevenLabs(api_key=api_key)
+                response = client.voices.get_all()
+                if not response.voices:
+                    console.print(
+                        "[yellow]No voices found for your ElevenLabs account. Check your API key.[/yellow]"
+                    )
+                else:
+                    available_voice_names = [
+                        voice.name.lower() for voice in response.voices
+                    ]
+                    console.print(
+                        f"✅ [green]Success! Found {len(list(response.voices))} voices for your account.[/green]"
+                    )
+                    checks["options"] = available_voice_names
+                    checks["explanation"] = (
+                        "Select a voice from your ElevenLabs account. Leave blank for random."
+                    )
+            except Exception as e:
+                console.print(f"❌ [red]Failed to fetch ElevenLabs voices: {e}[/red]")
+                console.print(
+                    "[yellow]You can enter a voice name manually or leave blank for random.[/yellow]"
+                )
+            # This setting is always optional (blank means random voice)
+            checks["optional"] = True
+
    incorrect = False
    if value == {}:
        incorrect = True
    if not incorrect and "type" in checks:
-        try:
-            value = eval(checks["type"])(value)  # fixme remove eval
-        except:
+        type_constructor = TYPE_CONSTRUCTORS.get(checks["type"])
+        if type_constructor:
+            try:
+                # Special handling for bool, as bool('False') is True.
+                if type_constructor is bool and isinstance(value, str):
+                    value = value.lower() not in ("false", "0", "no", "")
+                else:
+                    value = type_constructor(value)
+            except (ValueError, TypeError):
+                incorrect = True
+        else:
+            # The type specified in the template is not a known/safe type.
+            console.print(
+                f"[red]Error: Unknown type '{checks['type']}' in config template for '{name}'.[/red]"
+            )
            incorrect = True

+    # Prepare value for checks; especially for case-insensitive options like voice names
+    check_value = value
+    if name == "elevenlabs_voice_name":
+        check_value = str(value).lower().strip()
+
+    # A blank value is acceptable for optional fields
+    is_optional_and_blank = (
+        "optional" in checks and checks["optional"] and str(value).strip() == ""
+    )
+
    if (
-        not incorrect and "options" in checks and value not in checks["options"]
-    ):  # FAILSTATE Value is not one of the options
+        not incorrect
+        and not is_optional_and_blank
+        and "options" in checks
+        and check_value not in checks["options"]
+    ):
        incorrect = True
    if (
        not incorrect
@ -53,7 +118,11 @@ def check(value, checks, name):
        and not hasattr(value, "__iter__")
        and (
            ("nmin" in checks and checks["nmin"] is not None and value < checks["nmin"])
-            or ("nmax" in checks and checks["nmax"] is not None and value > checks["nmax"])
+            or (
+                "nmax" in checks
+                and checks["nmax"] is not None
+                and value > checks["nmax"]
+            )
        )
    ):
        incorrect = True
@ -61,24 +130,45 @@ def check(value, checks, name):
        not incorrect
        and hasattr(value, "__iter__")
        and (
-            ("nmin" in checks and checks["nmin"] is not None and len(value) < checks["nmin"])
-            or ("nmax" in checks and checks["nmax"] is not None and len(value) > checks["nmax"])
+            (
+                "nmin" in checks
+                and checks["nmin"] is not None
+                and len(value) < checks["nmin"]
+            )
+            or (
+                "nmax" in checks
+                and checks["nmax"] is not None
+                and len(value) > checks["nmax"]
+            )
        )
    ):
        incorrect = True

    if incorrect:
+        # Safely get the type constructor for the input prompt.
+        # The "False" default is a special case for handle_input, which we preserve.
+        type_str = get_check_value("type", "False")
+        check_type_arg = (
+            TYPE_CONSTRUCTORS.get(type_str) if type_str != "False" else False
+        )
+
        value = handle_input(
            message=(
-                (("[blue]Example: " + str(checks["example"]) + "\n") if "example" in checks else "")
+                (
+                    ("[blue]Example: " + str(checks["example"]) + "\n")
+                    if "example" in checks
+                    else ""
+                )
                + "[red]"
-                + ("Non-optional ", "Optional ")["optional" in checks and checks["optional"] is True]
+                + ("Non-optional ", "Optional ")[
+                    "optional" in checks and checks["optional"] is True
+                ]
            )
            + "[#C0CAF5 bold]"
            + str(name)
            + "[#F7768E bold]=",
            extra_info=get_check_value("explanation", ""),
-            check_type=eval(get_check_value("type", "False")),  # fixme remove eval
+            check_type=check_type_arg,
            default=get_check_value("default", NotImplemented),
            match=get_check_value("regex", ""),
            err_message=get_check_value("input_error", "Incorrect input"),
@ -113,7 +203,9 @@ def check_toml(template_file, config_file) -> Tuple[bool, Dict]:
    try:
        template = toml.load(template_file)
    except Exception as error:
-        console.print(f"[red bold]Encountered error when trying to to load {template_file}: {error}")
+        console.print(
+            f"[red bold]Encountered error when trying to to load {template_file}: {error}"
+        )
        return False
    try:
        config = toml.load(config_file)