RedditVideoMakerBot/utils/voice.py

import re


def sanitize_text(text: str) -> str:
    r"""Sanitizes the text for tts.
        What gets removed:
     - following characters`^_~@!&;#:-%“”‘"%*/{}[]()\|<>?=+`
     - any http or https links

    Args:
        text (str): Text to be sanitized

    Returns:
        str: Sanitized text
    """

    # remove any urls from the text
    regex_urls = r"((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*"

    result = re.sub(regex_urls, " ", text)

    # note: not removing apostrophes
    regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]"
    result = re.sub(regex_expr, " ", result)
    result = result.replace("+", "plus").replace("&", "and")
    # remove extra whitespace
    return " ".join(result.split())
-												sanitize text.

closes #59

											
										
										
											3 years ago
+								import re
-												Added docs and typing in function declarations

											
										
										
											2 years ago
+								def sanitize_text(text: str) -> str:
-												Remove unused imports and fix pylint errors

'async_playwright' must stay due to anomalous error
											
										
										
											2 years ago
+								    r"""Sanitizes the text for tts.
-												Added docs and typing in function declarations

											
										
										
											2 years ago
+								        What gets removed:
 								     - following characters`^_~@!&;#:-%“”‘"%*/{}[]()\|<>?=+`
 								     - any http or https links
 								    Args:
 								        text (str): Text to be sanitized
 								    Returns:
 								        str: Sanitized text
-												reformatted using black

command
"black . --line-length 101"

											
										
										
											2 years ago
+								    """
-												sanitize text.

closes #59

											
										
										
											3 years ago
-												reformatted using black

command
"black . --line-length 101"

											
										
										
											2 years ago
+								    # remove any urls from the text
 								    regex_urls = r"((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*"
-												improved voice.py uri remover

											
										
										
											3 years ago
-												reformatted using black

command
"black . --line-length 101"

											
										
										
											2 years ago
+								    result = re.sub(regex_urls, " ", text)
-												updated voice.py with uri removal too

											
										
										
											3 years ago
-												reformatted using black

command
"black . --line-length 101"

											
										
										
											2 years ago
+								    # note: not removing apostrophes
 								    regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]"
 								    result = re.sub(regex_expr, " ", result)
-												Fix small bugs and path issues


											
										
										
											2 years ago
+								    result = result.replace("+", "plus").replace("&", "and")
-												reformatted using black

command
"black . --line-length 101"

											
										
										
											2 years ago
+								    # remove extra whitespace
 								    return " ".join(result.split())