slight changes

pull/915/head
anthony lloyd 3 years ago
parent dc5d95668d
commit 6ce11fbdba

@ -34,8 +34,8 @@ def get_subreddit_undone(submissions: list, subreddit):
if submission.stickied: if submission.stickied:
print_substep("This post was pinned by moderators. Skipping...") print_substep("This post was pinned by moderators. Skipping...")
continue continue
if submission.num_comments == 0: if submission.num_comments < 100:
print_substep("This post has 0 comments. Skipping...") print_substep("This post has less than 100 comments. Skipping...")
continue continue
return submission return submission
print("all submissions have been done going by top submission order") print("all submissions have been done going by top submission order")

@ -79,42 +79,43 @@ def sanitize_text(text: str) -> str:
regex_urls = r"((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*" regex_urls = r"((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*"
profanity = [ profanity = [
["a word", r"(?:^|\W)ass(?:$|\W)", r"arse", r"asdf", r"asdf", r"asdf"], ["a word", r"(?:^|\W)ass(?:$|\W)", "arse", "asdf", "asdf", "asdf"],
["b word", r"bastard", r"blow job", r"blowie", r"bitch", r"asdf"], ["b word", "bastard", r"blow job", r"blowie", r"bitch", r"asdf"],
["c word", r"cunt", r"(?:^|\W)cum(?:$|\W)", r"(?:^|\W)coon(?:$|\W)", r"cock", r"clit"], ["c word", "cunt", r"(?:^|\W)cum(?:$|\W)", r"(?:^|\W)coon(?:$|\W)", r"cock", r"clit"],
["d word", r"dick", r"asdf", r"asdf", r"asdf", r"asdf"], ["d word", "dick", r"asdf", r"asdf", r"asdf", r"asdf"],
["e word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], ["e word", "asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
["f word", r"fuck", r"faggot", r"fag", r"asdf", r"asdf"], ["f word", r"fuck", "faggot", "fag", "asdf", "asdf"],
["g word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], ["g word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["h word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], ["h word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["i word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], ["i word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["j word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], ["j word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["k word", r"knob", r"kum", r"koon", r"asdf", r"asdf"], ["k word", "knob", "kum", r"koon", r"asdf", r"asdf"],
["l word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], ["l word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["m word", r"minge", r"(?:^|\W)mong(?:$|\W)", r"motherfucker", r"asdf", r"asdf"], ["m word", "minge", r"(?:^|\W)mong(?:$|\W)", r"motherfucker", r"asdf", r"asdf"],
["n word", r"nigga", r"nigger", r"asdf", r"asdf", r"asdf"], ["n word", "nigga", "nigger", r"asdf", r"asdf", r"asdf"],
["o word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], ["o word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["p word", r"pussy", r"piss", r"punani", r"prick", r"asdf"], ["p word", "pussy", "piss", "punani", "prick", "asdf"],
["q word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], ["q word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["r word", r"retard", r"retards", r"asdf", r"asdf", r"asdf"], ["r word", "retard", "retards", r"asdf", r"asdf", r"asdf"],
["s word", r"slut", r"shit", r"asdf", r"asdf", r"asdf"], ["s word", "slut", "shit", r"asdf", r"asdf", r"asdf"],
["t word", r"twat", r"(?:^|\W)tit(?:$|\W)", r"(?:^|\W)tits(?:$|\W)", r"titties", r"asdf"], ["t word", "twat", r"(?:^|\W)tit(?:$|\W)", r"(?:^|\W)tits(?:$|\W)", r"titties", r"asdf"],
["u word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], ["u word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["v word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"], ["v word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
["w word", r"wanker", r"asdf", r"asdf", r"asdf", r"asdf"], ["w word", "wanker", "asdf", "asdf", r"asdf", r"asdf"],
] ]
result = re.sub(regex_urls, " ", text) result = re.sub(regex_urls, "url", text)
# note: not removing apostrophes # note: not removing apostrophes
regex_expr = r"\s['|]|['|]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]" regex_expr = r"\s['|]|['|]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]"
result = re.sub(regex_expr, " ", result) result = re.sub(regex_expr, " ", result)
result = result.replace("+", "plus").replace("&", "and") result = result.replace("+", "plus").replace("&", "and")
#print(result)
for x in range(0, len(profanity)): for x in range(0, len(profanity)):
for y in range(1, len(profanity[0])): for y in range(1, len(profanity[0])):
# print("row: " + str(x)) # print("row: " + str(x))
# print("column: " + str(y)) # print("column: " + str(y))
result = re.sub(profanity[x][y], profanity[x][0], result) result = re.sub(profanity[x][y], profanity[x][0], result, flags=re.I)
# print(regex[x][y]) # print(regex[x][y])
# remove extra whitespace # remove extra whitespace
print(result) print(result)

Loading…
Cancel
Save