Add CAMB AI as a TTS provider

Adds CambAITTS class with streaming TTS support, voice cloning, and 16+ language options. Integrates into the existing provider registry with full config template and unit tests.
1 month ago · 0781f643c3
parent 569f25098a
commit 0781f643c3
6 changed files with 203 additions and 1 deletions
--- a/TTS/cambai.py
+++ b/TTS/cambai.py
@ -0,0 +1,50 @@
+import random
+
+from camb.client import CambAI, save_stream_to_file
+from camb.types import StreamTtsOutputConfiguration
+
+from utils import settings
+
+
+class CambAITTS:
+    def __init__(self):
+        self.max_chars = 5000
+        self.client: CambAI = None
+
+    def run(self, text, filepath, random_voice: bool = False):
+        if self.client is None:
+            self.initialize()
+        if random_voice:
+            voice_id = self.randomvoice()
+        else:
+            voice_id = int(settings.config["settings"]["tts"]["cambai_voice_id"])
+
+        language = str(
+            settings.config["settings"]["tts"].get("cambai_language", "en-us")
+        )
+        speech_model = str(
+            settings.config["settings"]["tts"].get("cambai_speech_model", "mars-flash")
+        )
+
+        stream = self.client.text_to_speech.tts(
+            text=text,
+            language=language,
+            voice_id=voice_id,
+            speech_model=speech_model,
+            output_configuration=StreamTtsOutputConfiguration(format="mp3"),
+        )
+        save_stream_to_file(stream, filepath)
+
+    def initialize(self):
+        api_key = settings.config["settings"]["tts"].get("cambai_api_key", "")
+        if not api_key:
+            raise ValueError(
+                "You didn't set a CAMB AI API key! Please set the config variable cambai_api_key to a valid API key."
+            )
+        self.client = CambAI(api_key=api_key)
+
+    def randomvoice(self):
+        if self.client is None:
+            self.initialize()
+        voices = self.client.voice_cloning.list_voices()
+        return random.choice(voices)["id"]
--- a/requirements.txt
+++ b/requirements.txt
@ -18,4 +18,5 @@ torch==2.7.0
 transformers==4.52.4
 ffmpeg-python==0.2.0
 elevenlabs==1.57.0
+camb-sdk
 yt-dlp==2025.10.22
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/test_cambai.py
+++ b/tests/test_cambai.py
@ -0,0 +1,145 @@
+import os
+import tempfile
+import pytest
+from unittest.mock import patch, Mock, MagicMock
+
+
+class TestCambAITTSUnit:
+    """Unit tests for CambAITTS — all mocked, no API key needed."""
+
+    def test_max_chars_is_5000(self):
+        with patch("TTS.cambai.settings") as mock_settings:
+            from TTS.cambai import CambAITTS
+            tts = CambAITTS()
+            assert tts.max_chars == 5000
+
+    def test_initialize_raises_without_api_key(self):
+        with patch("TTS.cambai.settings") as mock_settings:
+            mock_settings.config = {"settings": {"tts": {"cambai_api_key": ""}}}
+            from TTS.cambai import CambAITTS
+            tts = CambAITTS()
+            with pytest.raises(ValueError, match="CAMB AI API key"):
+                tts.initialize()
+
+    def test_initialize_creates_client(self):
+        with patch("TTS.cambai.settings") as mock_settings, \
+             patch("TTS.cambai.CambAI") as mock_camb:
+            mock_settings.config = {"settings": {"tts": {"cambai_api_key": "test-key"}}}
+            from TTS.cambai import CambAITTS
+            tts = CambAITTS()
+            tts.initialize()
+            mock_camb.assert_called_once_with(api_key="test-key")
+            assert tts.client is not None
+
+    def test_run_generates_mp3(self):
+        with patch("TTS.cambai.settings") as mock_settings, \
+             patch("TTS.cambai.CambAI") as mock_camb_cls, \
+             patch("TTS.cambai.save_stream_to_file") as mock_save, \
+             patch("TTS.cambai.StreamTtsOutputConfiguration") as mock_config:
+            mock_settings.config = {
+                "settings": {"tts": {
+                    "cambai_api_key": "test-key",
+                    "cambai_voice_id": "147320",
+                    "cambai_language": "en-us",
+                    "cambai_speech_model": "mars-flash",
+                }}
+            }
+            mock_client = Mock()
+            mock_client.text_to_speech.tts.return_value = iter([b"fake-audio"])
+            mock_camb_cls.return_value = mock_client
+
+            from TTS.cambai import CambAITTS
+            tts = CambAITTS()
+            tts.run("Hello world", "/tmp/test.mp3", random_voice=False)
+
+            mock_client.text_to_speech.tts.assert_called_once()
+            call_kwargs = mock_client.text_to_speech.tts.call_args
+            assert call_kwargs.kwargs["text"] == "Hello world"
+            assert call_kwargs.kwargs["voice_id"] == 147320
+            assert call_kwargs.kwargs["language"] == "en-us"
+            assert call_kwargs.kwargs["speech_model"] == "mars-flash"
+            mock_save.assert_called_once()
+            assert mock_save.call_args[0][1] == "/tmp/test.mp3"
+
+    def test_run_reads_config_voice_id(self):
+        with patch("TTS.cambai.settings") as mock_settings, \
+             patch("TTS.cambai.CambAI") as mock_camb_cls, \
+             patch("TTS.cambai.save_stream_to_file"), \
+             patch("TTS.cambai.StreamTtsOutputConfiguration"):
+            mock_settings.config = {
+                "settings": {"tts": {
+                    "cambai_api_key": "test-key",
+                    "cambai_voice_id": "99999",
+                    "cambai_language": "es-es",
+                    "cambai_speech_model": "mars-pro",
+                }}
+            }
+
+            mock_client = Mock()
+            mock_client.text_to_speech.tts.return_value = iter([b"audio"])
+            mock_camb_cls.return_value = mock_client
+
+            from TTS.cambai import CambAITTS
+            tts = CambAITTS()
+            tts.run("test", "/tmp/out.mp3")
+
+            call_kwargs = mock_client.text_to_speech.tts.call_args.kwargs
+            assert call_kwargs["voice_id"] == 99999
+            assert call_kwargs["language"] == "es-es"
+            assert call_kwargs["speech_model"] == "mars-pro"
+
+    def test_random_voice_picks_from_list(self):
+        with patch("TTS.cambai.settings") as mock_settings, \
+             patch("TTS.cambai.CambAI") as mock_camb_cls, \
+             patch("TTS.cambai.save_stream_to_file"), \
+             patch("TTS.cambai.StreamTtsOutputConfiguration"), \
+             patch("TTS.cambai.random") as mock_random:
+            mock_settings.config = {"settings": {"tts": {"cambai_api_key": "test-key"}}}
+            mock_client = Mock()
+            mock_client.voice_cloning.list_voices.return_value = [
+                {"id": 111, "voice_name": "Voice A"},
+                {"id": 222, "voice_name": "Voice B"},
+            ]
+            mock_client.text_to_speech.tts.return_value = iter([b"audio"])
+            mock_camb_cls.return_value = mock_client
+            mock_random.choice.return_value = {"id": 222, "voice_name": "Voice B"}
+
+            from TTS.cambai import CambAITTS
+            tts = CambAITTS()
+            tts.run("test", "/tmp/out.mp3", random_voice=True)
+
+            mock_client.voice_cloning.list_voices.assert_called_once()
+            mock_random.choice.assert_called_once()
+
+
+@pytest.mark.integration
+class TestCambAITTSIntegration:
+    """Integration tests — require CAMB_API_KEY env var."""
+
+    @pytest.fixture(autouse=True)
+    def skip_without_key(self):
+        if not os.environ.get("CAMB_API_KEY"):
+            pytest.skip("CAMB_API_KEY not set")
+
+    def test_real_api_generates_audio(self):
+        from camb.client import CambAI, save_stream_to_file
+        from camb.types import StreamTtsOutputConfiguration
+
+        client = CambAI(api_key=os.environ["CAMB_API_KEY"])
+        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
+            filepath = f.name
+
+        try:
+            stream = client.text_to_speech.tts(
+                text="Integration test for RedditVideoMakerBot.",
+                language="en-us",
+                voice_id=147320,
+                speech_model="mars-flash",
+                output_configuration=StreamTtsOutputConfiguration(format="mp3"),
+            )
+            save_stream_to_file(stream, filepath)
+            assert os.path.exists(filepath)
+            assert os.path.getsize(filepath) > 0
+        finally:
+            if os.path.exists(filepath):
+                os.remove(filepath)
--- a/utils/.config.template.toml
+++ b/utils/.config.template.toml
@ -45,7 +45,7 @@ background_thumbnail_font_size = { optional = true, type = "int", default = 96,
 background_thumbnail_font_color = { optional = true, default = "255,255,255", example = "255,255,255", explanation = "Font color in RGB format for the thumbnail text" }

 [settings.tts]
-voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", "OpenAI"], example = "tiktok", explanation = "The voice platform used for TTS generation. " }
+voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", "OpenAI", "CambAI"], example = "tiktok", explanation = "The voice platform used for TTS generation. " }
 random_voice = { optional = false, type = "bool", default = true, example = true, options = [true, false,], explanation = "Randomizes the voice used for each comment" }
 elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella", explanation = "The voice used for elevenlabs", options = ["Adam", "Antoni", "Arnold", "Bella", "Domi", "Elli", "Josh", "Rachel", "Sam", ] }
 elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" }
@ -61,3 +61,7 @@ openai_api_url = { optional = true, default = "https://api.openai.com/v1/", exam
 openai_api_key = { optional = true, example = "sk-abc123def456...", explanation = "Your OpenAI API key for TTS generation" }
 openai_voice_name = { optional = false, default = "alloy", example = "alloy", explanation = "The voice used for OpenAI TTS generation", options = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "af_heart"] }
 openai_model = { optional = false, default = "tts-1", example = "tts-1", explanation = "The model variant used for OpenAI TTS generation", options = ["tts-1", "tts-1-hd", "gpt-4o-mini-tts"] }
+cambai_api_key = { optional = true, example = "your-camb-ai-api-key", explanation = "CAMB AI API key from studio.camb.ai" }
+cambai_voice_id = { optional = false, default = "147320", example = "147320", explanation = "The CAMB AI voice ID (integer) to use for TTS generation" }
+cambai_speech_model = { optional = false, default = "mars-flash", example = "mars-flash", explanation = "The CAMB AI speech model to use", options = ["mars-flash", "mars-pro", "mars-instruct"] }
+cambai_language = { optional = false, default = "en-us", example = "en-us", explanation = "The language code for CAMB AI TTS (BCP-47 format)", options = ["en-us", "es-es", "fr-fr", "de-de", "ja-jp", "hi-in", "pt-br", "zh-cn", "ko-kr", "it-it", "nl-nl", "ru-ru", "ar-sa"] }
--- a/video_creation/voices.py
+++ b/video_creation/voices.py
@ -3,6 +3,7 @@ from typing import Tuple
 from rich.console import Console

 from TTS.aws_polly import AWSPolly
+from TTS.cambai import CambAITTS
 from TTS.elevenlabs import elevenlabs
 from TTS.engine_wrapper import TTSEngine
 from TTS.GTTS import GTTS
@ -23,6 +24,7 @@ TTSProviders = {
    "pyttsx": pyttsx,
    "ElevenLabs": elevenlabs,
    "OpenAI": OpenAITTS,
+    "CambAI": CambAITTS,
 }