slight changes

3 years ago · 6ce11fbdba
parent dc5d95668d
commit 6ce11fbdba
2 changed files with 29 additions and 28 deletions
--- a/utils/subreddit.py
+++ b/utils/subreddit.py
@ -34,8 +34,8 @@ def get_subreddit_undone(submissions: list, subreddit):
        if submission.stickied:
            print_substep("This post was pinned by moderators. Skipping...")
            continue
-        if submission.num_comments == 0:
+        if submission.num_comments < 100:
-            print_substep("This post has 0 comments. Skipping...")
+            print_substep("This post has less than 100 comments. Skipping...")
            continue
        return submission
    print("all submissions have been done going by top submission order")
--- a/utils/voice.py
+++ b/utils/voice.py
@ -79,42 +79,43 @@ def sanitize_text(text: str) -> str:
    regex_urls = r"((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*"
    profanity = [
-        ["a word", r"(?:^|\W)ass(?:$|\W)", r"arse", r"asdf", r"asdf", r"asdf"],
+        ["a word", r"(?:^|\W)ass(?:$|\W)", "arse", "asdf", "asdf", "asdf"],
-        ["b word", r"bastard", r"blow job", r"blowie", r"bitch", r"asdf"],
+        ["b word", "bastard", r"blow job", r"blowie", r"bitch", r"asdf"],
-        ["c word", r"cunt", r"(?:^|\W)cum(?:$|\W)", r"(?:^|\W)coon(?:$|\W)", r"cock", r"clit"],
+        ["c word", "cunt", r"(?:^|\W)cum(?:$|\W)", r"(?:^|\W)coon(?:$|\W)", r"cock", r"clit"],
-        ["d word", r"dick", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["d word", "dick", r"asdf", r"asdf", r"asdf", r"asdf"],
-        ["e word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["e word", "asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
-        ["f word", r"fuck", r"faggot", r"fag", r"asdf", r"asdf"],
+        ["f word", r"fuck", "faggot", "fag", "asdf", "asdf"],
-        ["g word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["g word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
-        ["h word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["h word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
-        ["i word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["i word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
-        ["j word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["j word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
-        ["k word", r"knob", r"kum", r"koon", r"asdf", r"asdf"],
+        ["k word", "knob", "kum", r"koon", r"asdf", r"asdf"],
-        ["l word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["l word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
-        ["m word", r"minge", r"(?:^|\W)mong(?:$|\W)", r"motherfucker", r"asdf", r"asdf"],
+        ["m word", "minge", r"(?:^|\W)mong(?:$|\W)", r"motherfucker", r"asdf", r"asdf"],
-        ["n word", r"nigga", r"nigger", r"asdf", r"asdf", r"asdf"],
+        ["n word", "nigga", "nigger", r"asdf", r"asdf", r"asdf"],
-        ["o word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["o word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
-        ["p word", r"pussy", r"piss", r"punani", r"prick", r"asdf"],
+        ["p word", "pussy", "piss", "punani", "prick", "asdf"],
-        ["q word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["q word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
-        ["r word", r"retard", r"retards", r"asdf", r"asdf", r"asdf"],
+        ["r word", "retard", "retards", r"asdf", r"asdf", r"asdf"],
-        ["s word", r"slut", r"shit", r"asdf", r"asdf", r"asdf"],
+        ["s word", "slut", "shit", r"asdf", r"asdf", r"asdf"],
-        ["t word", r"twat", r"(?:^|\W)tit(?:$|\W)", r"(?:^|\W)tits(?:$|\W)", r"titties", r"asdf"],
+        ["t word", "twat", r"(?:^|\W)tit(?:$|\W)", r"(?:^|\W)tits(?:$|\W)", r"titties", r"asdf"],
-        ["u word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["u word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
-        ["v word", r"asdf", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["v word", "asdf", "asdf", r"asdf", r"asdf", r"asdf"],
-        ["w word", r"wanker", r"asdf", r"asdf", r"asdf", r"asdf"],
+        ["w word", "wanker", "asdf", "asdf", r"asdf", r"asdf"],
    ]
-    result = re.sub(regex_urls, " ", text)
+    result = re.sub(regex_urls, "url", text)
    # note: not removing apostrophes
-    regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]"
+    regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%—“”‘\"%\*/{}\[\]\(\)\\|<>=+]"
    result = re.sub(regex_expr, " ", result)
    result = result.replace("+", "plus").replace("&", "and")
    #print(result)
    for x in range(0, len(profanity)):
        for y in range(1, len(profanity[0])):
            # print("row: " + str(x))
            # print("column: " + str(y))
-            result = re.sub(profanity[x][y], profanity[x][0], result)
+            result = re.sub(profanity[x][y], profanity[x][0], result, flags=re.I)
            # print(regex[x][y])
    # remove extra whitespace
    print(result)