[build-site] simplify script

2026-02-04 03:01:16 +01:00 · 2025-01-02 03:01:15 +02:00
parent a3b88ffdc5
commit b6dedaa54d
13 changed files with 604 additions and 14 deletions
--- a/build_site.sh
+++ b/build_site.sh
@@ -47,12 +47,15 @@ buildScraper()
    # always ignore package file
    ignore="-x $ignore package"

+    # For any directory, we want to include the target yml file and all non-yml files
    pushd "$dir" > /dev/null
-    if [ "$dir" != "./scrapers" ]; then
-        zip -r "$zipfile" . ${ignore} > /dev/null
-    else
-        zip "$zipfile" "$scraper_id.yml" > /dev/null
-    fi
+    # First zip just the target yml file
+    zip "$zipfile" "$scraper_id.yml" > /dev/null
+
+    # Then find and add all non-yml files in the current directory
+    find . -type f ! -name "*.yml" -print0 | while read -d $'\0' file; do
+        zip -g "$zipfile" "$file" ${ignore} > /dev/null
+    done
    popd > /dev/null

    # write to spec index
@@ -74,12 +77,8 @@ buildScraper()
    echo "" >> "$outdir"/index.yml
 }

-# find all yml files in ./scrapers - these are packages individually
-for f in ./scrapers/*/*.yml; do 
-    buildScraper "$f"
-done
-
-find ./scrapers/ -mindepth 3 -name *.yml -print0 | while read -d $'\0' f; do
+# skip scrapers in root directory
+find ./scrapers/ -mindepth 2 -name *.yml -print0 | while read -d $'\0' f; do
    buildScraper "$f"
 done

--- a/scrapers/ManyVids/ManyVids.yml
+++ b/scrapers/ManyVids/ManyVids.yml
@@ -211,11 +211,11 @@ jsonScrapers:
          postProcess:
            - replace:
                - regex: .+Profile\/\d+\/(.+)\/Store.+
-                  with: $1 (ManyVids)
+                  with: "$1 (ManyVids)"
        URL:
          selector: data.model.profileUrl
          postProcess:
            - replace:
                - regex: ^
                  with: https://www.manyvids.com
-# Last Updated December 31, 2024
+# Last Updated December 31, 2024
--- a/scrapers/ManyVids/ManyVidsClub.yml
+++ b/scrapers/ManyVids/ManyVidsClub.yml
@@ -39,4 +39,4 @@ jsonScrapers:
             - replace:
                - regex: (.+)
                  with: "$1 (ManyVids)"
-# Last Updated October 20, 2024
+# Last Updated December 27, 2024
--- a/scrapers/OnlyFans/SHALookup.py
+++ b/scrapers/OnlyFans/SHALookup.py
@@ -0,0 +1,330 @@
+# stdlib
+import time
+from datetime import datetime
+import hashlib
+from html import unescape
+import json
+import logging
+import os
+from pathlib import Path
+import re
+import sys
+# local modules
+from confusables import remove
+from oftitle import findTrailerTrigger
+
+# try importing config
+import config
+stashconfig = config.stashconfig if hasattr(config, 'stashconfig') else {
+    "scheme": "http",
+    "Host":"localhost",
+    "Port": "9999",
+    "ApiKey": "",
+}
+success_tag = config.success_tag if hasattr(config, 'success_tag') else "SHA: Match"
+failure_tag = config.failure_tag if hasattr(config, 'failure_tag') else "SHA: No Match"
+
+VERSION = "1.6.0"
+MAX_TITLE_LENGTH = 64
+
+# pip modules
+try:
+    import stashapi.log as log
+    from stashapi.stashapp import StashInterface
+except ModuleNotFoundError:
+    print("You need to install the stashapp-tools (stashapi) python module. (cmd): pip install stashapp-tools", file=sys.stderr)
+    sys.exit()
+try:
+    import emojis
+except ModuleNotFoundError:
+    log.error("You need to install the emojis module. (https://pypi.org/project/emojis/)")
+    log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install emojis")
+    sys.exit()
+try:
+    import requests
+except ModuleNotFoundError:
+    log.error("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)")
+    log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests")
+    sys.exit()
+try:
+    from lxml import html
+except ModuleNotFoundError:
+    log.error("You need to install the lxml module. (https://lxml.de/installation.html#installation)")
+    log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml")
+    sys.exit()
+
+# calculate sha256
+def compute_sha256(file_name):
+    hash_sha256 = hashlib.sha256()
+    with open(file_name, 'rb') as f:
+        for chunk in iter(lambda: f.read(4096), b""):
+            hash_sha256.update(chunk)
+    return hash_sha256.hexdigest()
+
+def sha_file(file):
+    try:
+        return compute_sha256(file['path'])
+    except FileNotFoundError:
+        try:
+            log.debug(f"file path: {file['path']}")
+            # try looking in relative path
+            # move up two directories from /scrapers/SHALookup
+            newpath = os.path.join(Path.cwd().parent.parent, file['path'])
+            return compute_sha256(newpath)
+        except FileNotFoundError:
+            log.error("File not found. Check if the file exists and is accessible.")
+            print("null")
+            sys.exit()
+
+# get post
+headers = {
+    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
+    'Referer': 'https://coomer.su/search_hash'
+}
+
+# define stash globally
+stash = StashInterface(stashconfig)
+
+def add_sha256(sha256, oshash):
+    scene = stash.find_scene_by_hash({"oshash":oshash}, fragment='id files { id fingerprint(type:"sha256") } ')
+    if scene["files"][0]["fingerprint"]:
+        return
+    stash.file_set_fingerprints(scene["files"][0]["id"], {"type": "sha256", "value":sha256})
+
+
+def getPostByHash(hash):
+    for attempt in range(1, 5):
+        shares = requests.get('https://coomer.su/api/v1/search_hash/' + hash, headers=headers, timeout=10)
+        if shares.status_code == 200:
+            break
+        log.debug(f"Request status code: {shares.status_code}")
+        time.sleep(2)
+    shares.raise_for_status()
+    data = shares.json()
+    if (shares.status_code == 404 or len(data) == 0):
+        log.debug("No results found")
+        return None
+    # construct url to fetch from API
+    post = data['posts'][0]
+    path = f'https://coomer.su/api/v1/{post["service"]}/user/{post["user"]}/post/{post["id"]}'
+    # fetch post
+    postres = requests.get(path, headers=headers)
+    if postres.status_code == 404:
+        log.error("Post not found")
+        sys.exit(1)
+    elif not postres.status_code == 200:
+        log.error(f"Request failed with status code {postres.status}")
+        sys.exit(1)
+    scene = postres.json()
+    scene = scene["post"]
+    return splitLookup(scene, hash)
+
+def splitLookup(scene, hash):
+    if (scene['service'] == "fansly"):
+        return parseFansly(scene, hash)
+    else:
+        return parseOnlyFans(scene, hash)
+
+def searchPerformers(scene):
+    pattern = re.compile(r"(?:^|\s)@([\w\-\.]+)")
+    content = unescape(scene['content'])
+    # if title is truncated, remove trailing dots and skip searching title
+    if scene['title'].endswith('..') and scene['title'].removesuffix('..') in content:
+        searchtext = content
+    else:
+        # if title is unique, search title and content
+        searchtext = scene['title'] + " " + content
+    usernames = re.findall(pattern,unescape(searchtext))
+    return usernames
+
+# from dolphinfix
+def truncate_title(title, max_length):
+    # Check if the title is already under max length
+    if len(title) <= max_length:
+        return title
+    last_punctuation_index = -1
+    punctuation_chars = {'.', '!', '?', '❤', '☺'}
+    punctuation_chars.update(emojis.get(title))
+    for c in punctuation_chars:
+        last_punctuation_index = max(title.rfind(c, 0, max_length), last_punctuation_index)
+    if last_punctuation_index != -1:
+        return title[:last_punctuation_index+1]
+    # Find the last space character before max length
+    last_space_index = title.rfind(" ",0, max_length)
+    # truncate at last_space_index if valid, else max_length
+    title_end = last_space_index if last_space_index != -1 else max_length
+    return title[:title_end]
+
+def normalize_title(title):
+    unconfused = remove(title)
+    return unconfused.strip()
+
+# from dolphinfix
+def format_title(description, username, date):
+    firstline = description.split("\n")[0].strip().replace("<br />", "")
+    formatted_title = truncate_title(
+        normalize_title(firstline), MAX_TITLE_LENGTH
+    )
+    if not len(description): # no description, return username and date
+        return username + " - " + date
+    elif len(formatted_title) <= 5: # title too short, add date
+        return formatted_title + " - " + date
+    elif not bool(re.search("[A-Za-z0-9]", formatted_title)): # textless, truncate and add date
+        # decrease MAX_TITLE_LENGTH further to account for " - YYYY-MM-DD"
+        return truncate_title(formatted_title, MAX_TITLE_LENGTH - 13) + " - " + date
+    else:
+        return formatted_title
+
+def parseAPI(scene, hash):
+    date = datetime.strptime(scene['published'], '%Y-%m-%dT%H:%M:%S').strftime('%Y-%m-%d')
+    result = {}
+    scene['content'] = unescape(scene['content']).replace("<br />", "\n")
+    # title parsing
+    result['Details'] = scene['content']
+    result['Date'] = date
+    result['Studio'] = {}
+    result['Performers'] = []
+    result['Tags'] = []
+    # parse usernames
+    usernames = searchPerformers(scene)
+    log.debug(f"{usernames=}")
+    for name in list(set(usernames)):
+        name = name.strip('.') # remove trailing full stop
+        result['Performers'].append({'Name': getnamefromalias(name)})
+    # figure out multi-part scene
+    # create array with file and attachments
+    if (scene['file']):
+        files = [scene['file']] + scene['attachments']
+    else:
+        files = scene['attachments']
+    # only include videos
+    files = [file for file in files if file['path'].endswith(".m4v") or file['path'].endswith(".mp4")]
+    for i, file in enumerate(files):
+        if hash in file['path']:
+            scene['part'] = i + 1
+    scene['total'] = len(files)
+    # add studio in specific function
+    return result, scene
+
+# alias search
+def getnamefromalias(alias):
+    perfs = stash.find_performers( f={"aliases":{"value": alias, "modifier":"EQUALS"}}, filter={"page":1, "per_page": 5}, fragment= "name" )
+    log.debug(perfs)
+    if len(perfs):
+        return perfs[0]['name']
+    return alias
+
+def getFanslyUsername(id):
+    res = requests.get(f"https://coomer.su/api/v1/fansly/user/{id}/profile", headers=headers)
+    if not res.status_code == 200:
+        log.error(f"Request failed with status code {res.status}")
+        sys.exit(1)
+    profile = res.json()
+    return profile["name"]
+
+# if fansly
+def parseFansly(scene, hash):
+    # fetch scene
+    result, scene = parseAPI(scene, hash)
+    # look up performer username
+    username = getFanslyUsername(scene['user'])
+    result['Title'] = format_title(result['Details'], username, result['Date'])
+    # add part on afterwards
+    if scene['total'] > 1:
+        result['Title'] += f" {scene['part']}/{scene['total']}"
+    # craft fansly URL
+    result['URL'] = f"https://fansly.com/post/{scene['id']}"
+    # add studio and performer
+    result['Studio']['Name'] = f"{username} (Fansly)"
+    result['Performers'].append({ 'Name': getnamefromalias(username) })
+    # Add trailer if hash matches preview
+    for attachment in scene['attachments']:
+        if 'preview' in attachment['name'] and hash in attachment['path']:
+            result['Tags'].append({ "Name": 'Trailer' })
+            break
+    return result
+
+# if onlyfans
+def parseOnlyFans(scene, hash):
+    # fetch scene
+    result, scene = parseAPI(scene, hash)
+    username = scene['user']
+    result['Title'] = format_title(result['Details'], username, result['Date'])
+    # add part on afterwards
+    if scene['total'] > 1:
+        result['Title'] += f" {scene['part']}/{scene['total']}"
+    # craft OnlyFans URL
+    result['URL'] = f"https://onlyfans.com/{scene['id']}/{username}"
+    # add studio and performer
+    result['Studio']['Name'] = f"{username} (OnlyFans)"
+    result['Performers'].append({ 'Name': getnamefromalias(username) })
+    # add trailer tag if contains keywords
+    if findTrailerTrigger(result['Details']):
+        result['Tags'].append({ "Name": 'Trailer' })
+    return result
+
+def hash_file(file):
+    fingerprints = file['fingerprints']
+    if sha256_fp := [fp for fp in fingerprints if fp['type'] == 'sha256']:
+        log.debug("Found in fingerprints")
+        return sha256_fp[0]['value']
+    else:
+        log.debug("Not found in fingerprints")
+        oshash = [fp for fp in fingerprints if fp['type'] == 'oshash'][0]['value']
+        sha256 = sha_file(file)
+        add_sha256(sha256, oshash)
+        return sha256
+
+def check_video_vertical(scene):
+    file = scene['files'][0]
+    ratio = file['height'] / file['width']
+    return ratio >= 1.5
+
+def scrape():
+    FRAGMENT = json.loads(sys.stdin.read())
+    SCENE_ID = FRAGMENT.get('id')
+    nomatch_id = stash.find_tag(failure_tag, create=True).get('id')
+    success_id = stash.find_tag(success_tag, create=True).get('id')
+    scene = stash.find_scene(SCENE_ID)
+    if not scene:
+        log.error("Scene not found - check your config.py file")
+        sys.exit(1)
+    result = None
+    for f in scene['files']:
+        hash = hash_file(f)
+        log.debug(hash)
+        result = getPostByHash(hash)
+        if result is not None:
+            break
+    # if no result, add "SHA: No Match tag"
+    if (result == None or not result['Title'] or not result['URL']):
+        stash.update_scenes({
+            'ids': [SCENE_ID],
+            'tag_ids': {
+                'mode': 'ADD',
+                'ids': [nomatch_id]
+            }
+        })
+        return None
+    # check if scene is vertical
+    if check_video_vertical(scene):
+        result['Tags'].append({ 'Name': 'Vertical Video' })
+    # if result, add tag
+    result['Tags'].append({ 'Name': success_tag })
+    return result
+
+def main():
+    try:
+        result = scrape()
+        print(json.dumps(result))
+        log.exit("Plugin exited normally.")
+    except Exception as e:
+        log.error(e)
+        logging.exception(e)
+        log.exit("Plugin exited with an exception.")
+
+if __name__ == '__main__':
+    main()
+
+# by Scruffy, feederbox826
+# Last Updated 2023-12-14
--- a/scrapers/OnlyFans/SHALookup.yml
+++ b/scrapers/OnlyFans/SHALookup.yml
@@ -0,0 +1,10 @@
+name: "SHA256 Lookup"
+sceneByFragment:
+    action: script
+    script:
+      - python
+      # use python3 instead if needed
+      - SHALookup.py
+      - query
+
+# Last Updated 2023-12-09
--- a/scrapers/OnlyFans/characters.py
+++ b/scrapers/OnlyFans/characters.py
@@ -0,0 +1,82 @@
+characters = {
+  ' ': ' ',
+  '0': '⓿',
+  '1': '11⓵➊⑴¹𝟏𝟙１𝟷𝟣⒈𝟭1➀₁①❶⥠',
+  '2': '⓶⒉⑵➋ƻ²ᒿ𝟚２𝟮𝟤ᒾ𝟸Ƨ𝟐②ᴤ₂➁❷ᘝƨ',
+  '3': '³ȝჳⳌꞫ𝟑ℨ𝟛𝟯𝟥Ꝫ➌ЗȜ⓷ӠƷ３𝟹⑶⒊ʒʓǯǮƺ𝕴ᶾзᦡ➂③₃ᶚᴣᴟ❸ҘҙӬӡӭӟӞ',
+  '4': '𝟰𝟺𝟦𝟒➍ҶᏎ𝟜ҷ⓸ҸҹӴӵᶣ４чㄩ⁴➃₄④❹Ӌ⑷⒋',
+  '5': '𝟱⓹➎Ƽ𝟓𝟻𝟝𝟧５➄₅⑤⁵❺ƽ⑸⒌',
+  '6': 'Ⳓ🄇𝟼Ꮾ𝟲𝟞𝟨𝟔➏⓺Ϭϭ⁶б６ᧈ⑥➅₆❻⑹⒍',
+  '7': '𝟕𝟟𝟩𝟳𝟽🄈⓻𐓒➐７⁷⑦₇❼➆⑺⒎',
+  '8': '𐌚🄉➑⓼８𝟠𝟪৪⁸₈𝟴➇⑧❽𝟾𝟖⑻⒏',
+  '9': '൭Ꝯ𝝑𝞋𝟅🄊𝟡𝟵Ⳋ⓽➒੧৭୨９𝟫𝟿𝟗⁹₉Գ➈⑨❾⑼⒐',
+  '10': '⓾❿➉➓🔟⑩⑽⒑',
+  '11': '⑪⑾⒒⓫',
+  '12': '⑫⑿⒓⓬',
+  '13': '⑬⒀⒔⓭',
+  '14': '⑭⒁⒕⓮',
+  '15': '⑮⒂⒖⓯',
+  '16': '⑯⒃⒗⓰',
+  '17': '⑰⒄⒘⓱',
+  '18': '⑱⒅⒙⓲',
+  '19': '⑲⒆⒚⓳',
+  '20': '⑳⒇⒛⓴',
+  'ae': 'æ',
+  'OE': 'Œ',
+  'oe': 'œ',
+  'pi': 'ᒆ',
+  'Nj': 'ǋ',
+  'AE': 'ᴁ',
+  'A': '𝑨𝔄ᗄ𝖠𝗔ꓯ𝞐🄐🄰Ꭿ𐊠𝕬𝜜𝐴ꓮᎪ𝚨ꭺ𝝖🅐Å∀🇦₳🅰𝒜𝘈𝐀𝔸дǺᗅⒶＡΑᾋᗩĂÃÅǍȀȂĀȺĄʌΛλƛᴀᴬДАልÄₐᕱªǞӒΆẠẢẦẨẬẮẰẲẴẶᾸᾹᾺΆᾼᾈᾉᾊᾌᾍᾎᾏἈἉἊἋἌἍἎἏḀȦǠӐÀÁÂẤẪ𝛢𝓐𝙰𝘼ᗩ',
+  'a': '∂⍺ⓐձǟᵃᶏ⒜аɒａαȃȁคǎმäɑāɐąᾄẚạảǡầẵḁȧӑӓãåάὰάăẩằẳặᾀᾁᾂᾃᾅᾆᾰᾱᾲᾳᾴᶐᾶᾷἀἁἂἃἄἅἆἇᾇậắàáâấẫǻⱥ𝐚𝑎𝒂𝒶𝓪𝔞𝕒𝖆𝖺𝗮𝘢𝙖𝚊𝛂𝛼𝜶𝝰𝞪⍶',
+  'B': '🄑𝔙𝖁ꞵ𝛃𝛽𝜷𝝱𝞫Ᏸ𐌁𝑩𝕭🄱𐊡𝖡𝘽ꓐ𝗕𝘉𝜝𐊂𝚩𝐁𝛣𝝗𝐵𝙱𝔹Ᏼᏼ𝞑Ꞵ𝔅🅑฿𝓑ᗿᗾᗽ🅱ⒷＢвϐᗷƁ乃ßცჩ๖βɮБՅ๒ᙖʙᴮᵇጌḄℬΒВẞḂḆɃദᗹᗸᵝᙞᙟᙝᛒᙗᙘᴃ🇧',
+  'b': 'ꮟᏏ𝐛𝘣𝒷𝔟𝓫𝖇𝖻𝑏𝙗𝕓𝒃𝗯𝚋♭ᑳᒈｂᖚᕹᕺⓑḃḅҍъḇƃɓƅᖯƄЬᑲþƂ⒝ЪᶀᑿᒀᒂᒁᑾьƀҌѢѣᔎ',
+  'C': 'ꞆႠ℃🄒ᏟⲤ🄲ꓚ𐊢𐌂🅲𐐕🅒☾ČÇⒸＣↃƇᑕㄈ¢८↻ĈϾՇȻᙅᶜ⒞ĆҀĊ©टƆℂℭϹС匚ḈҪʗᑖᑡᑢᑣᑤᑥⅭ𝐂𝐶𝑪𝒞𝓒𝕮𝖢𝗖𝘊𝘾ᔍ',
+  'c': '🝌ｃⅽ𝐜𝑐𝒄𝒸𝓬𝔠𝕔𝖈𝖼𝗰𝘤𝙘𝚌ᴄϲⲥсꮯ𐐽ⲥ𐐽ꮯĉｃⓒćčċçҁƈḉȼↄсርᴄϲҫ꒝ςɽϛ𝙲ᑦ᧚𝐜𝑐𝒄𝒸𝓬𝔠𝕔𝖈𝖼𝗰𝘤𝙘𝚌₵🇨ᥴᒼⅽ',
+  'D': '🄓Ꭰ🄳𝔡𝖉𝔻𝗗𝘋𝙳𝐷𝓓𝐃𝑫𝕯𝖣𝔇𝘿ꭰⅅ𝒟ꓓ🅳🅓ⒹＤƉᗪƊÐԺᴅᴰↁḊĐÞⅮᗞᑯĎḌḐḒḎᗫᗬᗟᗠᶛᴆ🇩',
+  'd': 'Ꮷ𝔡𝖉ᑯꓒ𝓭ᵭ₫ԃⓓｄḋďḍḑḓḏđƌɖɗᵈ⒟ԁⅾᶁԀᑺᑻᑼᑽᒄᑰᑱᶑ𝕕𝖽𝑑𝘥𝒅𝙙𝐝𝗱𝚍ⅆ𝒹ʠժ',
+  'E': '£ᙓ⋿∃ⴺꓱ𝐄𝐸𝔈𝕰𝖤𝘌𝙴𝛦𝜠ꭼ🄔🄴𝙀𝔼𐊆𝚬ꓰ𝝚𝞔𝓔𝑬𝗘🅴🅔ⒺΈＥƎἝᕮƐモЄᴇᴱᵉÉ乇ЁɆꂅ€ÈℰΕЕⴹᎬĒĔĖĘĚÊËԐỀẾỄỂẼḔḖẺȄȆẸỆȨḜḘḚἘἙἚἛἜῈΈӖὲέЀϵ🇪',
+  'e': 'əәⅇꬲꞓ⋴𝛆𝛜𝜀𝜖𝜺𝝐𝝴𝞊𝞮𝟄ⲉꮛ𐐩ꞒⲈ⍷𝑒𝓮𝕖𝖊𝘦𝗲𝚎𝙚𝒆𝔢𝖾𝐞Ҿҿⓔｅ⒠èᧉéᶒêɘἔềếễ૯ǝєεēҽɛểẽḕḗĕėëẻěȅȇẹệȩɇₑęḝḙḛ℮еԑѐӗᥱёἐἑἒἓἕℯ',
+  'F': 'ᖵꘘꓞꟻᖷ𝐅𝐹𝑭𝔽𝕱𝖥𝗙𝙁𝙵𝟊℉🄕🄵𐊇𝔉𝘍𐊥ꓝꞘ🅵🅕𝓕ⒻＦғҒᖴƑԲϝቻḞℱϜ₣🇫Ⅎ',
+  'f': '𝐟ᵮ𝑓𝒇𝒻𝓯𝔣𝕗𝖿𝗳𝙛𝚏ꬵꞙẝ𝖋ⓕｆƒḟʃբᶠ⒡ſꊰʄ∱ᶂ𝘧',
+  'G': '𝗚𝘎🄖ꓖᏳ🄶Ꮐᏻ𝔾𝓖𝑮𝕲ꮐ𝒢𝙂𝖦𝙶𝔊𝐺𝐆🅶🅖ⒼＧɢƓʛĢᘜᴳǴĠԌĜḠĞǦǤԍ₲🇬⅁',
+  'g': 'ᶃᶢⓖｇǵĝḡğġǧģց૭ǥɠﻭﻮᵍ⒢ℊɡᧁ𝐠𝑔𝒈𝓰𝔤𝕘𝖌𝗀𝗴𝘨𝙜𝚐',
+  'H': 'Ἤ🄗𝆦🄷𝜢ꓧ𝘏𝐻𝝜𝖧𐋏𝗛ꮋℍᎻℌⲎ𝑯𝞖🅷🅗ዞǶԋⒽＨĤᚺḢḦȞḤḨḪĦⱧҢңҤῊΉῌἨἩἪἫἭἮἯᾘᾙᾚᾛᾜᾝᾞᾟӉӈҥΉн卄♓𝓗ℋН𝐇𝙃𝙷ʜ𝛨Η𝚮ᕼӇᴴᵸ🇭',
+  'h': 'ꞕ৸𝕳ꚕᏲℏӊԊꜧᏂҺ⒣ђⓗｈĥḣḧȟḥḩḫẖħⱨհһከኩኪካɦℎ𝐡𝒉𝒽𝓱𝔥𝕙𝖍𝗁𝗵𝘩𝙝𝚑իʰᑋᗁɧんɥ',
+  'I': 'ⲒἿ🄘🄸ЇꀤᏆ🅸🅘إﺇٳأﺃٲٵⒾＩ៸ÌÍÎĨĪĬİÏḮỈǏȈȊỊĮḬƗェエῘῙῚΊἸἹἺἻἼἽἾⅠΪΊɪᶦᑊᥣ𝛪𝐈𝙄𝙸𝓵𝙡𝐼ᴵ𝚰𝑰🇮',
+  'i': '⍳ℹⅈ𝑖𝒊𝒾ı𝚤ɩιιͺ𝛊𝜄𝜾𝞲ꙇӏꭵᎥⓘｉìíîĩīĭïḯỉǐȉȋịḭῐῑῒΐῖῗἰἱἲⅰⅼ∣ⵏ￨׀ا١۱ߊᛁἳἴἵɨіὶίᶖ𝔦𝚒𝝸𝗂𝐢𝕚𝖎𝗶𝘪𝙞ίⁱᵢ𝓲⒤',
+  'J': '𝐉𝐽𝑱𝒥𝓙𝔍𝕁𝕵𝖩𝗝𝘑𝙅𝙹ꞲͿꓙ🄙🄹🅹🅙ⒿＪЈʝᒍנﾌĴʆวلյʖᴊᴶﻝጋɈⱼՂๅႱįᎫȷ丿ℐℑᒘᒙᒚᒛᒴᒵᒎᒏ🇯',
+  'j': '𝚥ꭻⅉⓙｊϳʲ⒥ɉĵǰјڶᶨ𝒿𝘫𝗷𝑗𝙟𝔧𝒋𝗃𝓳𝕛𝚓𝖏𝐣',
+  'K': '𝐊ꝄꝀ𝐾𝑲𝓚𝕶𝖪𝙺𝚱𝝟🄚𝗞🄺𝜥𝘒ꓗ𝙆𝕂Ⲕ𝔎𝛫Ꮶ𝞙𝒦🅺🅚₭ⓀＫĸḰќƘкҠκқҟӄʞҚКҡᴋᴷᵏ⒦ᛕЌጕḲΚKҜҝҞĶḴǨⱩϗӃ🇰',
+  'k': 'ⓚꝁｋḱǩḳķḵƙⱪᶄ𝐤𝘬𝗄𝕜𝜅𝜘𝜿𝝒𝝹𝞌𝞳𝙠𝚔𝑘𝒌ϰ𝛋𝛞𝟆𝗸𝓴𝓀',
+  'L': '𝐋𝐿𝔏𝕃𝕷𝖫𝗟𝘓𝙇ﴼ🄛🄻𐐛Ⳑ𝑳𝙻𐑃𝓛ⳑꮮᏞꓡ🅻🅛ﺈ└ⓁւＬĿᒪ乚ՆʟꓶιԼᴸˡĹረḶₗΓլĻᄂⅬℒⱢᥧᥨᒻᒶᒷᶫﺎᒺᒹᒸᒫ⎳ㄥŁⱠﺄȽ🇱',
+  'l': 'ⓛｌŀĺľḷḹļӀℓḽḻłﾚɭƚɫⱡ|Ɩ⒧ʅǀוןΙІ｜ᶩӏ𝓘𝕀𝖨𝗜𝘐𝐥𝑙𝒍𝓁𝔩𝕝𝖑𝗅𝗹𝘭𝚕𝜤𝝞ı𝚤ɩι𝛊𝜄𝜾𝞲',
+  'M': 'ꮇ🄜🄼𐌑𐊰ꓟⲘᎷ🅼🅜ⓂＭмṂ൱ᗰ州ᘻო๓♏ʍᙏᴍᴹᵐ⒨ḾМṀ௱ⅯℳΜϺᛖӍӎ𝐌𝑀𝑴𝓜𝔐𝕄𝕸𝖬𝗠𝘔𝙈𝙼𝚳𝛭𝜧𝝡𝞛🇲',
+  'm': '₥ᵯ𝖒𝐦𝗆𝔪𝕞𝓂ⓜｍനᙢ൩ḿṁⅿϻṃጠɱ៳ᶆ𝙢𝓶𝚖𝑚𝗺᧕᧗',
+  'N': '𝇙𝇚𝇜🄝𝆧𝙉🄽ℕꓠ𝛮𝝢𝙽𝚴𝑵𝑁Ⲛ𝐍𝒩𝞜𝗡𝘕𝜨𝓝𝖭🅽₦🅝ЙЍⓃҋ៷ＮᴎɴƝᑎ几иՈռИהЛπᴺᶰŃ刀ክṄⁿÑПΝᴨոϖǸŇṆŅṊṈทŊӢӣӤӥћѝйᥢҊᴻ🇳',
+  'n': 'ոռח𝒏𝓷𝙣𝑛𝖓𝔫𝗇𝚗𝗻ᥒⓝήｎǹᴒńñᾗηṅňṇɲņṋṉղຖՌƞŋ⒩ภกɳпŉлԉȠἠἡῃդᾐᾑᾒᾓᾔᾕᾖῄῆῇῂἢἣἤἥἦἧὴήበቡቢባቤብቦȵ𝛈𝜂𝜼𝝶𝞰𝕟𝘯𝐧𝓃ᶇᵰᥥ∩',
+  'O': '𝜽⭘🔿ꭴ⭕⏺🄁🄀Ꭴ𝚯𝚹𝛩𝛳𝜣𝜭𝝝𝝧𝞗𝞡ⴱᎾᏫ⍬𝞱𝝷𝛉𝟎𝜃θ𝟘𝑂𝑶𝓞𝔒𝕆𝕺𝗢𝘖𝙊𝛰㈇ꄲ🄞🔾🄾𐊒𝟬ꓳⲞ𐐄𐊫𐓂𝞞🅞⍥◯ⵁ⊖０⊝𝝤Ѳϴ𝚶𝜪ѺӦӨӪΌʘ𝐎ǑÒŎÓÔÕȌȎㇿ❍ⓄＯὋロ૦⊕ØФԾΘƠᴼᵒ⒪ŐÖₒ¤◊Φ〇ΟОՕଠഠ௦סỒỐỖỔṌȬṎŌṐṒȮȰȪỎỜỚỠỞỢỌỘǪǬǾƟⵔ߀៰⍜⎔⎕⦰⦱⦲⦳⦴⦵⦶⦷⦸⦹⦺⦻⦼⦽⦾⦿⧀⧁⧂⧃ὈὉὊὌὍ',
+  'o': 'ంಂംං૦௦۵ℴ𝑜𝒐𝖔ꬽ𝝄𝛔𝜎𝝈𝞂ჿ𝚘০୦ዐ𝛐𝗈𝞼ဝⲟ𝙤၀𐐬𝔬𐓪𝓸🇴⍤○ϙ🅾𝒪𝖮𝟢𝟶𝙾𝘰𝗼𝕠𝜊𝐨𝝾𝞸ᐤⓞѳ᧐ᥲðｏఠᦞՓòөӧóºōôǒȏŏồốȍỗổõσṍȭṏὄṑṓȯȫ๏ᴏőöѻоዐǭȱ০୦٥౦೦൦๐໐οօᴑ०੦ỏơờớỡởợọộǫøǿɵծὀὁόὸόὂὃὅ',
+  'P': '🄟🄿ꓑ𝚸𝙿𝞠𝙋ꮲⲢ𝒫𝝦𝑃𝑷𝗣𝐏𐊕𝜬𝘗𝓟𝖯𝛲Ꮲ🅟Ҏ🅿ⓅＰƤᑭ尸Ṗրφքᴘᴾᵖ⒫ṔｱקРየᴩⱣℙΡῬᑸᑶᑷᑹᑬᑮ🇵₱',
+  'p': 'ⲣҏ℗ⓟｐṕṗƥᵽῥρрƿǷῤ⍴𝓹𝓅𝐩𝑝𝒑𝔭𝕡𝖕𝗉𝗽𝘱𝙥𝚙𝛒𝝆𝞺𝜌𝞀',
+  'Q': '🅀🄠Ꝗ🆀🅠ⓆＱℚⵕԚ𝐐𝑄𝑸𝒬𝓠𝚀𝘘𝙌𝖰𝕼𝔔𝗤🇶',
+  'q': '𝓆ꝗ𝗾ⓠｑգ⒬۹զᑫɋɊԛ𝗊𝑞𝘲𝕢𝚚𝒒𝖖𝐪𝔮𝓺𝙦',
+  'R': '℞🄡℟ꭱᏒ𐒴ꮢᎡꓣ🆁🅡ⓇＲᴙȒʀᖇя尺ŔЯરƦᴿዪṚɌʁℛℜℝṘŘȐṜŖṞⱤ𝐑𝑅𝑹𝓡𝕽𝖱𝗥𝘙𝙍𝚁ᚱ🇷ᴚ',
+  'r': '𝚛ꭇᣴℾ𝚪𝛤𝜞𝝘𝞒ⲄГᎱᒥꭈⲅꮁⓡｒŕṙřȑȓṛṝŗгՐɾᥬṟɍʳ⒭ɼѓᴦᶉ𝐫𝑟𝒓𝓇𝓻𝔯𝕣𝖗𝗋𝗿𝘳𝙧ᵲґᵣ',
+  'S': '🅂🄪🄢ꇙ𝓢𝗦Ꮪ𝒮Ꮥ𝚂𝐒ꓢ𝖲𝔖𝙎𐊖𝕾𐐠𝘚𝕊𝑆𝑺🆂🅢ⓈＳṨŞֆՏȘˢ⒮ЅṠŠŚṤŜṦṢടᔕᔖᔢᔡᔣᔤ',
+  's': 'ᣵⓢꜱ𐑈ꮪｓśṥŝṡšṧʂṣṩѕşșȿᶊక𝐬𝑠𝒔𝓈𝓼𝔰𝕤𝖘𝗌𝘀𝘴𝙨𝚜ގ🇸',
+  'T': '🅃🄣七ፒ𝜯🆃𐌕𝚻𝛵𝕋𝕿𝑻𐊱𐊗𝖳𝙏🝨𝝩𝞣𝚃𝘛𝑇ꓔ⟙𝐓Ⲧ𝗧⊤𝔗Ꭲꭲ𝒯🅣⏇⏉ⓉＴтҬҭƬイŦԵτᴛᵀｲፕϮŤ⊥ƮΤТ下ṪṬȚŢṰṮ丅丁ᐪ𝛕𝜏𝝉𝞃𝞽𝓣ㄒ🇹ጥ',
+  't': 'ⓣｔṫẗťṭțȶ੮էʇ†ţṱṯƭŧᵗ⒯ʈեƫ𝐭𝑡𝒕𝓉𝓽𝔱𝕥𝖙𝗍𝘁𝘵𝙩𝚝ナ',
+  'U': '🅄Џ🄤ሀꓴ𐓎꒤🆄🅤ŨŬŮᑗᑘǓǕǗǙⓊＵȖᑌ凵ƱմԱꓵЦŪՄƲᙀᵁᵘ⒰ŰપÜՍÙÚÛṸṺǛỦȔƯỪỨỮỬỰỤṲŲṶṴɄᥩᑧ∪ᘮ⋃𝐔𝑈𝑼𝒰𝓤𝔘𝕌𝖀𝖴𝗨𝘜𝙐𝚄🇺',
+  'u': '𝘂𝘶𝙪𝚞ꞟꭎꭒ𝛖𝜐𝝊𝞄𝞾𐓶ὺύⓤｕùũūừṷṹŭǖữᥙǚǜὗυΰนսʊǘǔúůᴜűųยûṻцሁüᵾᵤµʋủȕȗưứửựụṳṵʉῠῡῢΰῦῧὐὑϋύὒὓὔὕὖᥔ𝐮𝑢𝒖𝓊𝓾𝔲𝕦𝖚𝗎ᶙ',
+  'V': '𝑉𝒱𝕍𝗩🄥🅅ꓦ𝑽𝖵𝘝Ꮩ𝚅𝙑𝐕🆅🅥ⓋＶᐯѴᵛ⒱۷ṾⅴⅤṼ٧ⴸѶᐺᐻ🇻𝓥',
+  'v': '∨⌄⋁ⅴ𝐯𝑣𝒗𝓋𝔳𝕧𝖛𝗏ꮩሀⓥｖ𝜐𝝊ṽṿ౮งѵעᴠνטᵥѷ៴ᘁ𝙫𝚟𝛎𝜈𝝂𝝼𝞶𝘷𝘃𝓿',
+  'W': '𝐖𝑊𝓦𝔚𝕎𝖂𝖶𝗪𝙒𝚆🄦🅆ᏔᎳ𝑾ꓪ𝒲𝘞🆆Ⓦ🅦ｗＷẂᾧᗯᥕ山ѠຟచաЩШώщฬшᙎᵂʷ⒲ฝሠẄԜẀŴẆẈധᘺѿᙡƜ₩🇼',
+  'w': '𝐰ꝡ𝑤𝒘𝓌𝔀𝔴𝕨𝖜𝗐𝘄𝘸𝙬𝚠աẁꮃẃⓦ⍵ŵẇẅẘẉⱳὼὠὡὢὣωὤὥὦὧῲῳῴῶῷⱲѡԝᴡώᾠᾡᾢᾣᾤᾥᾦɯ𝝕𝟉𝞏',
+  'X': 'ꭓꭕ𝛘𝜒𝝌𝞆𝟀ⲭ🞨𝑿𝛸🄧🞩🞪🅇🞫🞬𐌗Ⲭꓫ𝖃𝞦𝘟𐊐𝚾𝝬𝜲Ꭓ𐌢𝖷𝑋𝕏𝔛𐊴𝗫🆇🅧❌Ⓧ𝓧ＸẊ᙭χㄨ𝒳ӾჯӼҳЖΧҲᵡˣ⒳אሸẌꊼⅩХ╳᙮ᕁᕽⅹᚷⵝ𝙓𝚇乂𝐗🇽',
+  'x': '᙮ⅹ𝑥𝒙𝓍𝔵𝕩𝖝𝗑𝘅ᕁᕽⓧｘхẋ×ₓ⤫⤬⨯ẍᶍ𝙭ӽ𝘹𝐱𝚡⨰ﾒ𝔁',
+  'Y': '𝒴🄨𝓨𝔜𝖄𝖸𝘠𝙔𝚼𝛶𝝪𝞤УᎩᎽⲨ𝚈𝑌𝗬𝐘ꓬ𝒀𝜰𐊲🆈🅨ⓎＹὛƳㄚʏ⅄ϔ￥¥ՎϓγץӲЧЎሃŸɎϤΥϒҮỲÝŶỸȲẎỶỴῨῩῪΎὙὝὟΫΎӮӰҰұ𝕐🇾',
+  'y': '𝐲𝑦𝒚𝓎𝔂𝔶𝕪𝖞𝗒𝘆𝘺𝙮𝚢ʏỿꭚγℽ𝛄𝛾𝜸𝝲𝞬🅈ᎽᎩⓨｙỳýŷỹȳẏÿỷуყẙỵƴɏᵞɣʸᶌү⒴ӳӱӯўУʎ',
+  'Z': '🄩🅉ꓜ𝗭𝐙☡Ꮓ𝘡🆉🅩ⓏＺẔƵ乙ẐȤᶻ⒵ŹℤΖŻŽẒⱫ🇿',
+  'z': '𝑍𝒁𝒵𝓩𝖹𝙕𝚉𝚭𝛧𝜡𝝛𝞕ᵶꮓ𝐳𝑧𝒛𝓏𝔃𝔷𝕫𝖟𝗓𝘇𝘻𝙯𝚣ⓩｚźẑżžẓẕƶȥɀᴢጊʐⱬᶎʑᙆ'
+}
--- a/scrapers/OnlyFans/config.py.example
+++ b/scrapers/OnlyFans/config.py.example
@@ -0,0 +1,9 @@
+stashconfig = {
+    "scheme": "http",
+    "Host":"localhost",
+    "Port": "9999",
+    "ApiKey": "",
+}
+success_tag = "[SHA: Scraped]"
+failure_tag = "[SHA: No Match]"
+disable_nfkd = False
--- a/scrapers/OnlyFans/confusables.py
+++ b/scrapers/OnlyFans/confusables.py
@@ -0,0 +1,34 @@
+from characters import characters
+from util import checkLNP, clean
+import math
+
+# The current cache of all the supported alphabet characters
+alphabetMap = dict()
+
+# The current cache of all the supported confusable characters
+confusablesMap = dict()
+
+for key, value in characters.items():
+    alphabetMap[key] = value
+    for char in value:
+        confusablesMap[char] = key
+
+# Removes confusable unicode characters from a string.
+def remove(str):
+    if checkLNP(str):
+        return str;
+    newStr = '';
+    for char in clean(str):
+        newStr += confusablesMap.get(char) or char
+    return newStr;
+
+# Randomly mixes up a string with random confusable characters.
+def obfuscate(str):
+    newStr = '';
+    for char in str:
+        charMap = alphabetMap.get(char);
+        if (charMap):
+            newStr += charMap[math.floor(math.random() * charMap.length)];
+        else:
+            newStr += char;
+    return newStr;
--- a/scrapers/OnlyFans/migrate_from_db.py
+++ b/scrapers/OnlyFans/migrate_from_db.py
@@ -0,0 +1,33 @@
+import os
+from sqlite import get_rows
+
+import config
+stashconfig = config.stashconfig if hasattr(config, 'stashconfig') else {
+    "scheme": "http",
+    "Host":"localhost",
+    "Port": "9999",
+    "ApiKey": "",
+}
+
+try:
+    import stashapi.log as log
+    from stashapi.stashapp import StashInterface
+except ModuleNotFoundError:
+    print("You need to install the stashapp-tools (stashapi) python module. (cmd): pip install stashapp-tools", file=sys.stderr)
+    sys.exit()
+
+
+stash = StashInterface(stashconfig)
+
+if os.path.exists("sha-cache.db"):
+    
+    log.info("migrating sha256 values to fingerprints...")
+    for sha256, oshash in get_rows():
+        log.info(f"{sha256=} {oshash=}")
+        
+        scene = stash.find_scene_by_hash({"oshash":oshash}, fragment='id files { id fingerprint(type:"sha256") } ')
+        if scene["files"][0]["fingerprint"]:
+            return
+        stash.file_set_fingerprints(scene["files"][0]["id"], {"type": "sha256", "value":sha256})
+
+    os.rename("sha-cache.db", "sha-cache.db.old")
--- a/scrapers/OnlyFans/oftitle.py
+++ b/scrapers/OnlyFans/oftitle.py
@@ -0,0 +1,45 @@
+import re
+
+dmRegex = r"\b(dm)(?:[\'\‘\’\`\"\“\”]*)(?:s?)\b"
+triggerArray = [
+    # DM / in your DMs
+    "dm",
+    "dms"
+    "inbox",
+    "messages"
+    # sending
+    "sending you",
+    "sending this",
+    # partial video
+    "teaser",
+    "snippet",
+    "entire",
+    "full video",
+    "full vid",
+    "full scene",
+    # message prompts
+    "with the message",
+    "message me",
+    "send me"
+    # unlocking
+    "unlock",
+    "receive it",
+    "purchase",
+    # tipping
+    "under this post",
+    "tip",
+    # rebill
+    "rebills",
+    "rebillers",
+]
+
+def findTrailerTrigger(oftitle):
+    # check regex
+    if re.search(dmRegex, oftitle, re.IGNORECASE):
+        return True
+    # check other regex array
+    for trigger in triggerArray:
+        triggerRegex = f"\b{trigger}\b"
+        if re.search(triggerRegex, oftitle, re.IGNORECASE):
+            return True
+    return False
--- a/scrapers/OnlyFans/requirements.txt
+++ b/scrapers/OnlyFans/requirements.txt
@@ -0,0 +1,4 @@
+emojis
+requests
+lxml
+stashapp-tools>=0.2.40
--- a/scrapers/OnlyFans/sqlite.py
+++ b/scrapers/OnlyFans/sqlite.py
@@ -0,0 +1,26 @@
+import sqlite3
+
+db = sqlite3.connect('sha-cache.db')
+cursor = db.cursor()
+
+def setup_sqlite():
+    # set up migrations
+    cursor.execute("""
+    CREATE TABLE IF NOT EXISTS sha_cache (
+        sha256 TEXT NOT NULL,
+        oshash TEXT NOT NULL
+    );""")
+    cursor.execute("CREATE INDEX IF NOT EXISTS oshash_index ON sha_cache (oshash);")
+    db.commit()
+
+def add_sha256(sha256, oshash):
+    cursor.execute("INSERT INTO sha_cache VALUES (?, ?)", (sha256, oshash))
+    db.commit()
+
+def lookup_sha(oshash):
+    cursor.execute("SELECT sha256 FROM sha_cache WHERE oshash = ?", [oshash])
+    return cursor.fetchone()
+
+def get_rows():
+    cursor.execute("SELECT sha256, oshash FROM sha_cache")
+    return cursor.fetchall()
--- a/scrapers/OnlyFans/util.py
+++ b/scrapers/OnlyFans/util.py
@@ -0,0 +1,18 @@
+import re
+# @copyright Mathias Bynens <https://mathiasbynens.be/>. MIT license.
+
+regexSymbolWithCombiningMarks = re.compile("([\0-\u02FF\u0370-\u1AAF\u1B00-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uE000-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])([\u0300-\u036F\u1AB0-\u1AFF\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]+)")
+regexLineBreakCombiningMarks = re.compile("[\0-\x08\x0E-\x1F\x7F-\x84\x86-\x9F\u0300-\u034E\u0350-\u035B\u0363-\u036F\u0483-\u0489\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u061C\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u08D4-\u08E1\u08E3-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u0A01-\u0A03\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A70\u0A71\u0A75\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AE2\u0AE3\u0B01-\u0B03\u0B3C\u0B3E-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B62\u0B63\u0B82\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C00-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C62\u0C63\u0C81-\u0C83\u0CBC\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CE2\u0CE3\u0D01-\u0D03\u0D3E-\u0D44\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D62\u0D63\u0D82\u0D83\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F7E\u0F80-\u0F84\u0F86\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u135D-\u135F\u1712-\u1714\u1732-\u1734\u1752\u1753\u1772\u1773\u180B-\u180D\u1885\u1886\u18A9\u1920-\u192B\u1930-\u193B\u1A17-\u1A1B\u1A7F\u1AB0-\u1ABE\u1B00-\u1B04\u1B34-\u1B44\u1B6B-\u1B73\u1B80-\u1B82\u1BA1-\u1BAD\u1BE6-\u1BF3\u1C24-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE8\u1CED\u1CF2-\u1CF4\u1CF8\u1CF9\u1DC0-\u1DF5\u1DFB-\u1DFF\u200C\u200E\u200F\u202A-\u202E\u2066-\u206F\u20D0-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302F\u3035\u3099\u309A\uA66F-\uA672\uA674-\uA67D\uA69E\uA69F\uA6F0\uA6F1\uA802\uA806\uA80B\uA823-\uA827\uA880\uA881\uA8B4-\uA8C5\uA8E0-\uA8F1\uA926-\uA92D\uA947-\uA953\uA980-\uA983\uA9B3-\uA9C0\uAA29-\uAA36\uAA43\uAA4C\uAA4D\uAAEB-\uAAEF\uAAF5\uAAF6\uABE3-\uABEA\uABEC\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F\uFFF9-\uFFFB]|\uD800[\uDDFD\uDEE0\uDF76-\uDF7A]|\uD802[\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F\uDEE5\uDEE6]|\uD804[\uDC00-\uDC02\uDC38-\uDC46\uDC7F-\uDC82\uDCB0-\uDCBA\uDD00-\uDD02\uDD27-\uDD34\uDD73\uDD80-\uDD82\uDDB3-\uDDC0\uDDCA-\uDDCC\uDE2C-\uDE37\uDE3E\uDEDF-\uDEEA\uDF00-\uDF03\uDF3C\uDF3E-\uDF44\uDF47\uDF48\uDF4B-\uDF4D\uDF57\uDF62\uDF63\uDF66-\uDF6C\uDF70-\uDF74]|\uD805[\uDC35-\uDC46\uDCB0-\uDCC3\uDDAF-\uDDB5\uDDB8-\uDDC0\uDDDC\uDDDD\uDE30-\uDE40\uDEAB-\uDEB7]|\uD807[\uDC2F-\uDC36\uDC38-\uDC3F\uDC92-\uDCA7\uDCA9-\uDCB6]|\uD81A[\uDEF0-\uDEF4\uDF30-\uDF36]|\uD81B[\uDF51-\uDF7E\uDF8F-\uDF92]|\uD82F[\uDC9D\uDC9E\uDCA0-\uDCA3]|\uD834[\uDD65-\uDD69\uDD6D-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44]|\uD836[\uDE00-\uDE36\uDE3B-\uDE6C\uDE75\uDE84\uDE9B-\uDE9F\uDEA1-\uDEAF]|\uD838[\uDC00-\uDC06\uDC08-\uDC18\uDC1B-\uDC21\uDC23\uDC24\uDC26-\uDC2A]|\uD83A[\uDCD0-\uDCD6\uDD44-\uDD4A]|\uDB40[\uDC01\uDC20-\uDC7F\uDD00-\uDDEF]")
+checkLNPRegex = re.compile("^(?:[~`!@#%^&*()\{\}\[\];:\"'<,.>?/\\|_+=-]|[a-zA-Z0-9\s])+$")
+
+def checkLNP(str):
+  return checkLNPRegex.match(str);
+
+"""
+Utility function to call 2 other functions which remove Combining Marks/Invisible characters
+"""
+def clean(str):
+  str = re.sub(regexLineBreakCombiningMarks, '', str)
+  str = re.sub(regexSymbolWithCombiningMarks, '$1', str)
+  str = re.sub(r'[\u200B-\u200D\uFEFF\u2063]', '', str)
+  return str