fixed a lot of issues

4 months ago · c0a0f9c5d2
parent 64bf647de9
commit c0a0f9c5d2
7 changed files with 74 additions and 23 deletions
--- a/TTS/engine_wrapper.py
+++ b/TTS/engine_wrapper.py
@ -144,11 +144,20 @@ class TTSEngine:
            print("OSError")

    def call_tts(self, filename: str, text: str):
-        self.tts_module.run(
-            text,
-            filepath=f"{self.path}/{filename}.mp3",
-            random_voice=settings.config["settings"]["tts"]["random_voice"],
-        )
+        # Check if the TTS module supports random_voice parameter
+        import inspect
+        run_signature = inspect.signature(self.tts_module.run)
+        if 'random_voice' in run_signature.parameters:
+            self.tts_module.run(
+                text,
+                filepath=f"{self.path}/{filename}.mp3",
+                random_voice=settings.config["settings"]["tts"]["random_voice"],
+            )
+        else:
+            self.tts_module.run(
+                text,
+                filepath=f"{self.path}/{filename}.mp3",
+            )
        # try:
        #     self.length += MP3(f"{self.path}/{filename}.mp3").info.length
        # except (MutagenError, HeaderNotFoundError):
--- a/main.py
+++ b/main.py
@ -79,9 +79,9 @@ def shutdown() -> NoReturn:


 if __name__ == "__main__":
-    if sys.version_info.major != 3 or sys.version_info.minor not in [10, 11]:
+    if sys.version_info.major != 3 or sys.version_info.minor not in [10, 11, 12, 13]:
        print(
-            "Hey! Congratulations, you've made it so far (which is pretty rare with no Python 3.10). Unfortunately, this program only works on Python 3.10. Please install Python 3.10 and try again."
+            "Hey! Congratulations, you've made it so far (which is pretty rare with no Python 3.10). Unfortunately, this program only works on Python 3.10+. Please install Python 3.10+ and try again."
        )
        sys.exit()
    ffmpeg_install()
--- a/requirements.txt
+++ b/requirements.txt
@ -2,7 +2,7 @@ boto3==1.34.127
 botocore==1.34.127
 gTTS==2.5.1
 moviepy==1.0.3
-playwright==1.44.0
+playwright>=1.45.0
 praw==7.7.1
 prawcore~=2.3.0
 requests==2.32.3
@ -10,15 +10,11 @@ rich==13.7.1
 toml==0.10.2
 translators==5.9.2
 pyttsx3==2.90
-Pillow==10.3.0
+Pillow>=10.4.0
 tomlkit==0.12.5
 Flask==3.0.3
 clean-text==0.6.0
 unidecode==1.3.8
-spacy==3.7.5
-torch==2.3.1
-transformers==4.41.2
 ffmpeg-python==0.2.0
 elevenlabs==1.3.0
 yt-dlp==2024.5.27
-numpy==1.26.4
--- a/utils/imagenarator.py
+++ b/utils/imagenarator.py
@ -7,6 +7,29 @@ from rich.progress import track

 from TTS.engine_wrapper import process_text
 from utils.fonts import getheight, getsize
+from utils import settings
+
+
+def calculate_text_dimensions(text, font, padding, wrap=50):
+    """
+    Calculate the dimensions needed for text with given font and padding
+    """
+    lines = textwrap.wrap(text, width=wrap)
+    max_line_width = 0
+    total_height = 0
+    
+    for line in lines:
+        line_width, line_height = getsize(font, line)
+        max_line_width = max(max_line_width, line_width)
+        total_height += line_height
+    
+    # Add padding between lines
+    if len(lines) > 1:
+        total_height += (len(lines) - 1) * padding
+    
+    # Add minimal padding around the text
+    padding_around = 10
+    return max_line_width + (padding_around * 2), total_height + (padding_around * 2)


 def draw_multiple_line_text(
@ -60,16 +83,33 @@ def imagemaker(theme, reddit_obj: dict, txtclr, padding=5, transparent=False) ->
    texts = reddit_obj["thread_post"]
    id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"])

+    # Use the actual video resolution from config instead of fixed landscape size
+    W = int(settings.config["settings"]["resolution_w"])
+    H = int(settings.config["settings"]["resolution_h"])
+    size = (W, H)
+    
+    # Adjust font size based on video resolution for better readability
+    # For 9:16 portrait videos, use smaller font size to fit better in the compact background
    if transparent:
-        font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), 100)
+        font_size = min(50, max(25, H // 50))  # Smaller font size for compact background
+        font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), font_size)
    else:
-        font = ImageFont.truetype(os.path.join("fonts", "Roboto-Regular.ttf"), 100)
-    size = (1920, 1080)
+        font_size = min(50, max(25, H // 50))  # Smaller font size for compact background
+        font = ImageFont.truetype(os.path.join("fonts", "Roboto-Regular.ttf"), font_size)

    image = Image.new("RGBA", size, theme)

    for idx, text in track(enumerate(texts), "Rendering Image"):
-        image = Image.new("RGBA", size, theme)
        text = process_text(text, False)
-        draw_multiple_line_text(image, text, font, txtclr, padding, wrap=30, transparent=transparent)
+        # Adjust text wrapping based on video width for better fit
+        wrap_width = max(20, min(35, W // 60))  # More balanced wrap width to fill the overlay better
+        
+        # Calculate the dimensions needed for this text
+        text_width, text_height = calculate_text_dimensions(text, font, padding=2, wrap=wrap_width)
+        
+        # Create an image that's only as big as the text content
+        image = Image.new("RGBA", (text_width, text_height), theme)
+        
+        # Use smaller padding to make text lines closer together and fill more vertical space
+        draw_multiple_line_text(image, text, font, txtclr, padding=2, wrap=wrap_width, transparent=transparent)
        image.save(f"assets/temp/{id}/png/img{idx}.png")
--- a/video_creation/background.py
+++ b/video_creation/background.py
@ -86,9 +86,10 @@ def download_background_video(background_config: Tuple[str, str, str, Any]):
    print_substep("Downloading the backgrounds videos... please be patient 🙏 ")
    print_substep(f"Downloading {filename} from {uri}")
    ydl_opts = {
-        "format": "bestvideo[height<=1080][ext=mp4]",
+        "format": "best[height<=1080]/best",
        "outtmpl": f"assets/backgrounds/video/{credit}-{filename}",
        "retries": 10,
+        "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
--- a/video_creation/final_video.py
+++ b/video_creation/final_video.py
@ -254,7 +254,11 @@ def make_final_video(

    console.log(f"[bold green] Video Will Be: {length} Seconds Long")

-    screenshot_width = int((W * 45) // 100)
+    # For 9:16 portrait videos, use an even smaller width to completely prevent clipping
+    # Since W=1080 and H=1920, we need to be extremely conservative with the width
+    screenshot_width = int((W * 15) // 100)  # Use only 15% of video width (162px)
+    # Ensure minimum and maximum bounds for portrait videos with extra padding
+    screenshot_width = max(150, min(screenshot_width, W - 300))  # Min 150px, Max W-300px for extra generous padding
    audio = ffmpeg.input(f"assets/temp/{reddit_id}/audio.mp3")
    final_audio = merge_background_audio(audio, reddit_id)

--- a/video_creation/screenshot_downloader.py
+++ b/video_creation/screenshot_downloader.py
@ -79,12 +79,13 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
        # Device scale factor (or dsf for short) allows us to increase the resolution of the screenshots
        # When the dsf is 1, the width of the screenshot is 600 pixels
        # so we need a dsf such that the width of the screenshot is greater than the final resolution of the video
-        dsf = (W // 600) + 1
+        # For better scaling, use a more conservative approach
+        dsf = max(1, min(2, (W // 800) + 1))  # Cap dsf between 1 and 2 for better compatibility

        context = browser.new_context(
            locale=lang or "en-us",
            color_scheme="dark",
-            viewport=ViewportSize(width=W, height=H),
+            viewport=ViewportSize(width=min(W, 1200), height=min(H, 1600)),  # Cap viewport size
            device_scale_factor=dsf,
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36",
        )
@ -131,7 +132,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
            page.reload()
        # Get the thread screenshot
        page.goto(reddit_object["thread_url"], timeout=0)
-        page.set_viewport_size(ViewportSize(width=W, height=H))
+        page.set_viewport_size(ViewportSize(width=min(W, 1200), height=min(H, 1600)))
        page.wait_for_load_state()
        page.wait_for_timeout(5000)