removed profanity filter from this branch

added fmpeg.exe to git ignore.
this branch requires ffmpeg in the main directory
pull/915/head
anthony lloyd 3 years ago
parent 6ce11fbdba
commit 50815cbe74

1
.gitignore vendored

@ -243,3 +243,4 @@ video_creation/data/videos.json
video_creation/data/envvars.txt
config.toml
ffmpeg.exe

@ -1,3 +1,4 @@
import re
import sys
from datetime import datetime
@ -67,10 +68,8 @@ def sanitize_text(text: str) -> str:
What gets removed:
- following characters`^_~@!&;#:-%“”‘"%*/{}[]()\|<>?=+`
- any http or https links
Args:
text (str): Text to be sanitized
Returns:
str: Sanitized text
"""
@ -78,45 +77,11 @@ def sanitize_text(text: str) -> str:
# remove any urls from the text
regex_urls = r"((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*"
profanity = [
["a word", r"(?:^|\W)ass(?:$|\W)", "arse", "asdf", "asdf", "asdf"],
["b word", "bastard", r"blow job", r"blowie", r"bitch", r"asdf"],
["c word", "cunt", r"(?:^|\W)cum(?:$|\W)", r"(?:^|\W)coon(?:$|\W)", r"cock", r"clit"],
["d word", "dick", r"asdf", r"asdf", r"asdf", r"asdf"],
["e word", "asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
["f word", r"fuck", "faggot", "fag", "asdf", "asdf"],
["g word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["h word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["i word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["j word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["k word", "knob", "kum", r"koon", r"asdf", r"asdf"],
["l word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["m word", "minge", r"(?:^|\W)mong(?:$|\W)", r"motherfucker", r"asdf", r"asdf"],
["n word", "nigga", "nigger", r"asdf", r"asdf", r"asdf"],
["o word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["p word", "pussy", "piss", "punani", "prick", "asdf"],
["q word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["r word", "retard", "retards", r"asdf", r"asdf", r"asdf"],
["s word", "slut", "shit", r"asdf", r"asdf", r"asdf"],
["t word", "twat", r"(?:^|\W)tit(?:$|\W)", r"(?:^|\W)tits(?:$|\W)", r"titties", r"asdf"],
["u word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["v word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["w word", "wanker", "asdf", "asdf", r"asdf", r"asdf"],
]
result = re.sub(regex_urls, "url", text)
# note: not removing apostrophes
regex_expr = r"\s['|]|['|]\s|[\^_~@!&;#:\-%—“”‘\"%\*/{}\[\]\(\)\\|<>=+]"
result = re.sub(regex_expr, " ", result)
result = result.replace("+", "plus").replace("&", "and")
#print(result)
for x in range(0, len(profanity)):
for y in range(1, len(profanity[0])):
# print("row: " + str(x))
# print("column: " + str(y))
result = re.sub(profanity[x][y], profanity[x][0], result, flags=re.I)
# print(regex[x][y])
# remove extra whitespace
print(result)
return " ".join(result.split())
return " ".join(result.split())
Loading…
Cancel
Save