From 50815cbe74aef4861e6e8391f39a9e75a89ec583 Mon Sep 17 00:00:00 2001 From: anthony lloyd Date: Mon, 11 Jul 2022 17:12:12 +1000 Subject: [PATCH] removed profanity filter from this branch added fmpeg.exe to git ignore. this branch requires ffmpeg in the main directory --- .gitignore | 1 + utils/voice.py | 39 ++------------------------------------- 2 files changed, 3 insertions(+), 37 deletions(-) diff --git a/.gitignore b/.gitignore index 793db5d..604fee1 100644 --- a/.gitignore +++ b/.gitignore @@ -243,3 +243,4 @@ video_creation/data/videos.json video_creation/data/envvars.txt config.toml +ffmpeg.exe diff --git a/utils/voice.py b/utils/voice.py index 30d882c..d759545 100644 --- a/utils/voice.py +++ b/utils/voice.py @@ -1,3 +1,4 @@ + import re import sys from datetime import datetime @@ -67,10 +68,8 @@ def sanitize_text(text: str) -> str: What gets removed: - following characters`^_~@!&;#:-%“”‘"%*/{}[]()\|<>?=+` - any http or https links - Args: text (str): Text to be sanitized - Returns: str: Sanitized text """ @@ -78,45 +77,11 @@ def sanitize_text(text: str) -> str: # remove any urls from the text regex_urls = r"((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*" - profanity = [ - ["a word", r"(?:^|\W)ass(?:$|\W)", "arse", "asdf", "asdf", "asdf"], - ["b word", "bastard", r"blow job", r"blowie", r"bitch", r"asdf"], - ["c word", "cunt", r"(?:^|\W)cum(?:$|\W)", r"(?:^|\W)coon(?:$|\W)", r"cock", r"clit"], - ["d word", "dick", r"asdf", r"asdf", r"asdf", r"asdf"], - ["e word", "asdf", r"asdf", r"asdf", r"asdf", r"asdf"], - ["f word", r"fuck", "faggot", "fag", "asdf", "asdf"], - ["g word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], - ["h word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], - ["i word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], - ["j word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], - ["k word", "knob", "kum", r"koon", r"asdf", r"asdf"], - ["l word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], - ["m word", "minge", r"(?:^|\W)mong(?:$|\W)", r"motherfucker", r"asdf", r"asdf"], - ["n word", "nigga", "nigger", r"asdf", r"asdf", r"asdf"], - ["o word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], - ["p word", "pussy", "piss", "punani", "prick", "asdf"], - ["q word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], - ["r word", "retard", "retards", r"asdf", r"asdf", r"asdf"], - ["s word", "slut", "shit", r"asdf", r"asdf", r"asdf"], - ["t word", "twat", r"(?:^|\W)tit(?:$|\W)", r"(?:^|\W)tits(?:$|\W)", r"titties", r"asdf"], - ["u word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], - ["v word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], - ["w word", "wanker", "asdf", "asdf", r"asdf", r"asdf"], - ] - result = re.sub(regex_urls, "url", text) # note: not removing apostrophes regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%—“”‘\"%\*/{}\[\]\(\)\\|<>=+]" result = re.sub(regex_expr, " ", result) result = result.replace("+", "plus").replace("&", "and") - #print(result) - for x in range(0, len(profanity)): - for y in range(1, len(profanity[0])): - # print("row: " + str(x)) - # print("column: " + str(y)) - result = re.sub(profanity[x][y], profanity[x][0], result, flags=re.I) - # print(regex[x][y]) # remove extra whitespace - print(result) - return " ".join(result.split()) + return " ".join(result.split()) \ No newline at end of file