From 6ce11fbdba7d3175012f40f642abbe0987db2bee Mon Sep 17 00:00:00 2001 From: anthony lloyd Date: Mon, 11 Jul 2022 17:05:03 +1000 Subject: [PATCH] slight changes --- utils/subreddit.py | 4 ++-- utils/voice.py | 53 +++++++++++++++++++++++----------------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/utils/subreddit.py b/utils/subreddit.py index ac3a275..c7e00e9 100644 --- a/utils/subreddit.py +++ b/utils/subreddit.py @@ -34,8 +34,8 @@ def get_subreddit_undone(submissions: list, subreddit): if submission.stickied: print_substep("This post was pinned by moderators. Skipping...") continue - if submission.num_comments == 0: - print_substep("This post has 0 comments. Skipping...") + if submission.num_comments < 100: + print_substep("This post has less than 100 comments. Skipping...") continue return submission print("all submissions have been done going by top submission order") diff --git a/utils/voice.py b/utils/voice.py index 2d06e11..30d882c 100644 --- a/utils/voice.py +++ b/utils/voice.py @@ -79,42 +79,43 @@ def sanitize_text(text: str) -> str: regex_urls = r"((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*" profanity = [ - ["a word", r"(?:^|\W)ass(?:$|\W)", r"arse", r"asdf", r"asdf", r"asdf"], - ["b word", r"bastard", r"blow job", r"blowie", r"bitch", r"asdf"], - ["c word", r"cunt", r"(?:^|\W)cum(?:$|\W)", r"(?:^|\W)coon(?:$|\W)", r"cock", r"clit"], - ["d word", r"dick", r"asdf", r"asdf", r"asdf", r"asdf"], - ["e word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], - ["f word", r"fuck", r"faggot", r"fag", r"asdf", r"asdf"], - ["g word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], - ["h word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], - ["i word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], - ["j word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], - ["k word", r"knob", r"kum", r"koon", r"asdf", r"asdf"], - ["l word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], - ["m word", r"minge", r"(?:^|\W)mong(?:$|\W)", r"motherfucker", r"asdf", r"asdf"], - ["n word", r"nigga", r"nigger", r"asdf", r"asdf", r"asdf"], - ["o word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], - ["p word", r"pussy", r"piss", r"punani", r"prick", r"asdf"], - ["q word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], - ["r word", r"retard", r"retards", r"asdf", r"asdf", r"asdf"], - ["s word", r"slut", r"shit", r"asdf", r"asdf", r"asdf"], - ["t word", r"twat", r"(?:^|\W)tit(?:$|\W)", r"(?:^|\W)tits(?:$|\W)", r"titties", r"asdf"], - ["u word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], - ["v word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], - ["w word", r"wanker", r"asdf", r"asdf", r"asdf", r"asdf"], + ["a word", r"(?:^|\W)ass(?:$|\W)", "arse", "asdf", "asdf", "asdf"], + ["b word", "bastard", r"blow job", r"blowie", r"bitch", r"asdf"], + ["c word", "cunt", r"(?:^|\W)cum(?:$|\W)", r"(?:^|\W)coon(?:$|\W)", r"cock", r"clit"], + ["d word", "dick", r"asdf", r"asdf", r"asdf", r"asdf"], + ["e word", "asdf", r"asdf", r"asdf", r"asdf", r"asdf"], + ["f word", r"fuck", "faggot", "fag", "asdf", "asdf"], + ["g word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], + ["h word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], + ["i word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], + ["j word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], + ["k word", "knob", "kum", r"koon", r"asdf", r"asdf"], + ["l word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], + ["m word", "minge", r"(?:^|\W)mong(?:$|\W)", r"motherfucker", r"asdf", r"asdf"], + ["n word", "nigga", "nigger", r"asdf", r"asdf", r"asdf"], + ["o word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], + ["p word", "pussy", "piss", "punani", "prick", "asdf"], + ["q word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], + ["r word", "retard", "retards", r"asdf", r"asdf", r"asdf"], + ["s word", "slut", "shit", r"asdf", r"asdf", r"asdf"], + ["t word", "twat", r"(?:^|\W)tit(?:$|\W)", r"(?:^|\W)tits(?:$|\W)", r"titties", r"asdf"], + ["u word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], + ["v word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"], + ["w word", "wanker", "asdf", "asdf", r"asdf", r"asdf"], ] - result = re.sub(regex_urls, " ", text) + result = re.sub(regex_urls, "url", text) # note: not removing apostrophes - regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]" + regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%—“”‘\"%\*/{}\[\]\(\)\\|<>=+]" result = re.sub(regex_expr, " ", result) result = result.replace("+", "plus").replace("&", "and") + #print(result) for x in range(0, len(profanity)): for y in range(1, len(profanity[0])): # print("row: " + str(x)) # print("column: " + str(y)) - result = re.sub(profanity[x][y], profanity[x][0], result) + result = re.sub(profanity[x][y], profanity[x][0], result, flags=re.I) # print(regex[x][y]) # remove extra whitespace print(result)