[build-site] simplify script

This commit is contained in:
DogmaDragon
2025-01-02 03:01:15 +02:00
committed by feederbox826
parent a3b88ffdc5
commit b6dedaa54d
13 changed files with 604 additions and 14 deletions

View File

@@ -47,12 +47,15 @@ buildScraper()
# always ignore package file
ignore="-x $ignore package"
# For any directory, we want to include the target yml file and all non-yml files
pushd "$dir" > /dev/null
if [ "$dir" != "./scrapers" ]; then
zip -r "$zipfile" . ${ignore} > /dev/null
else
zip "$zipfile" "$scraper_id.yml" > /dev/null
fi
# First zip just the target yml file
zip "$zipfile" "$scraper_id.yml" > /dev/null
# Then find and add all non-yml files in the current directory
find . -type f ! -name "*.yml" -print0 | while read -d $'\0' file; do
zip -g "$zipfile" "$file" ${ignore} > /dev/null
done
popd > /dev/null
# write to spec index
@@ -74,12 +77,8 @@ buildScraper()
echo "" >> "$outdir"/index.yml
}
# find all yml files in ./scrapers - these are packages individually
for f in ./scrapers/*/*.yml; do
buildScraper "$f"
done
find ./scrapers/ -mindepth 3 -name *.yml -print0 | while read -d $'\0' f; do
# skip scrapers in root directory
find ./scrapers/ -mindepth 2 -name *.yml -print0 | while read -d $'\0' f; do
buildScraper "$f"
done

View File

@@ -211,11 +211,11 @@ jsonScrapers:
postProcess:
- replace:
- regex: .+Profile\/\d+\/(.+)\/Store.+
with: $1 (ManyVids)
with: "$1 (ManyVids)"
URL:
selector: data.model.profileUrl
postProcess:
- replace:
- regex: ^
with: https://www.manyvids.com
# Last Updated December 31, 2024
# Last Updated December 31, 2024

View File

@@ -39,4 +39,4 @@ jsonScrapers:
- replace:
- regex: (.+)
with: "$1 (ManyVids)"
# Last Updated October 20, 2024
# Last Updated December 27, 2024

View File

@@ -0,0 +1,330 @@
# stdlib
import time
from datetime import datetime
import hashlib
from html import unescape
import json
import logging
import os
from pathlib import Path
import re
import sys
# local modules
from confusables import remove
from oftitle import findTrailerTrigger
# try importing config
import config
stashconfig = config.stashconfig if hasattr(config, 'stashconfig') else {
"scheme": "http",
"Host":"localhost",
"Port": "9999",
"ApiKey": "",
}
success_tag = config.success_tag if hasattr(config, 'success_tag') else "SHA: Match"
failure_tag = config.failure_tag if hasattr(config, 'failure_tag') else "SHA: No Match"
VERSION = "1.6.0"
MAX_TITLE_LENGTH = 64
# pip modules
try:
import stashapi.log as log
from stashapi.stashapp import StashInterface
except ModuleNotFoundError:
print("You need to install the stashapp-tools (stashapi) python module. (cmd): pip install stashapp-tools", file=sys.stderr)
sys.exit()
try:
import emojis
except ModuleNotFoundError:
log.error("You need to install the emojis module. (https://pypi.org/project/emojis/)")
log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install emojis")
sys.exit()
try:
import requests
except ModuleNotFoundError:
log.error("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)")
log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests")
sys.exit()
try:
from lxml import html
except ModuleNotFoundError:
log.error("You need to install the lxml module. (https://lxml.de/installation.html#installation)")
log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml")
sys.exit()
# calculate sha256
def compute_sha256(file_name):
hash_sha256 = hashlib.sha256()
with open(file_name, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_sha256.update(chunk)
return hash_sha256.hexdigest()
def sha_file(file):
try:
return compute_sha256(file['path'])
except FileNotFoundError:
try:
log.debug(f"file path: {file['path']}")
# try looking in relative path
# move up two directories from /scrapers/SHALookup
newpath = os.path.join(Path.cwd().parent.parent, file['path'])
return compute_sha256(newpath)
except FileNotFoundError:
log.error("File not found. Check if the file exists and is accessible.")
print("null")
sys.exit()
# get post
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
'Referer': 'https://coomer.su/search_hash'
}
# define stash globally
stash = StashInterface(stashconfig)
def add_sha256(sha256, oshash):
scene = stash.find_scene_by_hash({"oshash":oshash}, fragment='id files { id fingerprint(type:"sha256") } ')
if scene["files"][0]["fingerprint"]:
return
stash.file_set_fingerprints(scene["files"][0]["id"], {"type": "sha256", "value":sha256})
def getPostByHash(hash):
for attempt in range(1, 5):
shares = requests.get('https://coomer.su/api/v1/search_hash/' + hash, headers=headers, timeout=10)
if shares.status_code == 200:
break
log.debug(f"Request status code: {shares.status_code}")
time.sleep(2)
shares.raise_for_status()
data = shares.json()
if (shares.status_code == 404 or len(data) == 0):
log.debug("No results found")
return None
# construct url to fetch from API
post = data['posts'][0]
path = f'https://coomer.su/api/v1/{post["service"]}/user/{post["user"]}/post/{post["id"]}'
# fetch post
postres = requests.get(path, headers=headers)
if postres.status_code == 404:
log.error("Post not found")
sys.exit(1)
elif not postres.status_code == 200:
log.error(f"Request failed with status code {postres.status}")
sys.exit(1)
scene = postres.json()
scene = scene["post"]
return splitLookup(scene, hash)
def splitLookup(scene, hash):
if (scene['service'] == "fansly"):
return parseFansly(scene, hash)
else:
return parseOnlyFans(scene, hash)
def searchPerformers(scene):
pattern = re.compile(r"(?:^|\s)@([\w\-\.]+)")
content = unescape(scene['content'])
# if title is truncated, remove trailing dots and skip searching title
if scene['title'].endswith('..') and scene['title'].removesuffix('..') in content:
searchtext = content
else:
# if title is unique, search title and content
searchtext = scene['title'] + " " + content
usernames = re.findall(pattern,unescape(searchtext))
return usernames
# from dolphinfix
def truncate_title(title, max_length):
# Check if the title is already under max length
if len(title) <= max_length:
return title
last_punctuation_index = -1
punctuation_chars = {'.', '!', '?', '', ''}
punctuation_chars.update(emojis.get(title))
for c in punctuation_chars:
last_punctuation_index = max(title.rfind(c, 0, max_length), last_punctuation_index)
if last_punctuation_index != -1:
return title[:last_punctuation_index+1]
# Find the last space character before max length
last_space_index = title.rfind(" ",0, max_length)
# truncate at last_space_index if valid, else max_length
title_end = last_space_index if last_space_index != -1 else max_length
return title[:title_end]
def normalize_title(title):
unconfused = remove(title)
return unconfused.strip()
# from dolphinfix
def format_title(description, username, date):
firstline = description.split("\n")[0].strip().replace("<br />", "")
formatted_title = truncate_title(
normalize_title(firstline), MAX_TITLE_LENGTH
)
if not len(description): # no description, return username and date
return username + " - " + date
elif len(formatted_title) <= 5: # title too short, add date
return formatted_title + " - " + date
elif not bool(re.search("[A-Za-z0-9]", formatted_title)): # textless, truncate and add date
# decrease MAX_TITLE_LENGTH further to account for " - YYYY-MM-DD"
return truncate_title(formatted_title, MAX_TITLE_LENGTH - 13) + " - " + date
else:
return formatted_title
def parseAPI(scene, hash):
date = datetime.strptime(scene['published'], '%Y-%m-%dT%H:%M:%S').strftime('%Y-%m-%d')
result = {}
scene['content'] = unescape(scene['content']).replace("<br />", "\n")
# title parsing
result['Details'] = scene['content']
result['Date'] = date
result['Studio'] = {}
result['Performers'] = []
result['Tags'] = []
# parse usernames
usernames = searchPerformers(scene)
log.debug(f"{usernames=}")
for name in list(set(usernames)):
name = name.strip('.') # remove trailing full stop
result['Performers'].append({'Name': getnamefromalias(name)})
# figure out multi-part scene
# create array with file and attachments
if (scene['file']):
files = [scene['file']] + scene['attachments']
else:
files = scene['attachments']
# only include videos
files = [file for file in files if file['path'].endswith(".m4v") or file['path'].endswith(".mp4")]
for i, file in enumerate(files):
if hash in file['path']:
scene['part'] = i + 1
scene['total'] = len(files)
# add studio in specific function
return result, scene
# alias search
def getnamefromalias(alias):
perfs = stash.find_performers( f={"aliases":{"value": alias, "modifier":"EQUALS"}}, filter={"page":1, "per_page": 5}, fragment= "name" )
log.debug(perfs)
if len(perfs):
return perfs[0]['name']
return alias
def getFanslyUsername(id):
res = requests.get(f"https://coomer.su/api/v1/fansly/user/{id}/profile", headers=headers)
if not res.status_code == 200:
log.error(f"Request failed with status code {res.status}")
sys.exit(1)
profile = res.json()
return profile["name"]
# if fansly
def parseFansly(scene, hash):
# fetch scene
result, scene = parseAPI(scene, hash)
# look up performer username
username = getFanslyUsername(scene['user'])
result['Title'] = format_title(result['Details'], username, result['Date'])
# add part on afterwards
if scene['total'] > 1:
result['Title'] += f" {scene['part']}/{scene['total']}"
# craft fansly URL
result['URL'] = f"https://fansly.com/post/{scene['id']}"
# add studio and performer
result['Studio']['Name'] = f"{username} (Fansly)"
result['Performers'].append({ 'Name': getnamefromalias(username) })
# Add trailer if hash matches preview
for attachment in scene['attachments']:
if 'preview' in attachment['name'] and hash in attachment['path']:
result['Tags'].append({ "Name": 'Trailer' })
break
return result
# if onlyfans
def parseOnlyFans(scene, hash):
# fetch scene
result, scene = parseAPI(scene, hash)
username = scene['user']
result['Title'] = format_title(result['Details'], username, result['Date'])
# add part on afterwards
if scene['total'] > 1:
result['Title'] += f" {scene['part']}/{scene['total']}"
# craft OnlyFans URL
result['URL'] = f"https://onlyfans.com/{scene['id']}/{username}"
# add studio and performer
result['Studio']['Name'] = f"{username} (OnlyFans)"
result['Performers'].append({ 'Name': getnamefromalias(username) })
# add trailer tag if contains keywords
if findTrailerTrigger(result['Details']):
result['Tags'].append({ "Name": 'Trailer' })
return result
def hash_file(file):
fingerprints = file['fingerprints']
if sha256_fp := [fp for fp in fingerprints if fp['type'] == 'sha256']:
log.debug("Found in fingerprints")
return sha256_fp[0]['value']
else:
log.debug("Not found in fingerprints")
oshash = [fp for fp in fingerprints if fp['type'] == 'oshash'][0]['value']
sha256 = sha_file(file)
add_sha256(sha256, oshash)
return sha256
def check_video_vertical(scene):
file = scene['files'][0]
ratio = file['height'] / file['width']
return ratio >= 1.5
def scrape():
FRAGMENT = json.loads(sys.stdin.read())
SCENE_ID = FRAGMENT.get('id')
nomatch_id = stash.find_tag(failure_tag, create=True).get('id')
success_id = stash.find_tag(success_tag, create=True).get('id')
scene = stash.find_scene(SCENE_ID)
if not scene:
log.error("Scene not found - check your config.py file")
sys.exit(1)
result = None
for f in scene['files']:
hash = hash_file(f)
log.debug(hash)
result = getPostByHash(hash)
if result is not None:
break
# if no result, add "SHA: No Match tag"
if (result == None or not result['Title'] or not result['URL']):
stash.update_scenes({
'ids': [SCENE_ID],
'tag_ids': {
'mode': 'ADD',
'ids': [nomatch_id]
}
})
return None
# check if scene is vertical
if check_video_vertical(scene):
result['Tags'].append({ 'Name': 'Vertical Video' })
# if result, add tag
result['Tags'].append({ 'Name': success_tag })
return result
def main():
try:
result = scrape()
print(json.dumps(result))
log.exit("Plugin exited normally.")
except Exception as e:
log.error(e)
logging.exception(e)
log.exit("Plugin exited with an exception.")
if __name__ == '__main__':
main()
# by Scruffy, feederbox826
# Last Updated 2023-12-14

View File

@@ -0,0 +1,10 @@
name: "SHA256 Lookup"
sceneByFragment:
action: script
script:
- python
# use python3 instead if needed
- SHALookup.py
- query
# Last Updated 2023-12-09

View File

@@ -0,0 +1,82 @@
characters = {
' ': ' ',
'0': '',
'1': '11⓵➊⑴¹𝟏𝟙𝟷𝟣𝟭1➀₁①❶⥠',
'2': '⓶⒉⑵➋ƻ²ᒿ𝟚2𝟮𝟤ᒾ𝟸Ƨ𝟐②ᴤ₂➁❷ᘝƨ',
'3': '³ȝჳⳌꞫ𝟑ℨ𝟛𝟯𝟥Ꝫ➌ЗȜ⓷ӠƷ3𝟹⑶⒊ʒʓǯǮƺ𝕴ᶾзᦡ➂③₃ᶚᴣᴟ❸ҘҙӬӡӭӟӞ',
'4': '𝟰𝟺𝟦𝟒➍ҶᏎ𝟜ҷ⓸ҸҹӴӵᶣ4чㄩ⁴➃₄④❹Ӌ⑷⒋',
'5': '𝟱⓹➎Ƽ𝟓𝟻𝟝𝟧5➄₅⑤⁵❺ƽ⑸⒌',
'6': 'Ⳓ🄇𝟼Ꮾ𝟲𝟞𝟨𝟔➏⓺Ϭϭ⁶б6ᧈ⑥➅₆❻⑹⒍',
'7': '𝟕𝟟𝟩𝟳𝟽🄈⓻𐓒➐7⁷⑦₇❼➆⑺⒎',
'8': '𐌚🄉➑⓼8𝟠𝟪৪⁸₈𝟴➇⑧❽𝟾𝟖⑻⒏',
'9': '൭Ꝯ𝝑𝞋𝟅🄊𝟡𝟵Ⳋ⓽➒੧৭୨9𝟫𝟿𝟗⁹₉Գ➈⑨❾⑼⒐',
'10': '⓾❿➉➓🔟⑩⑽⒑',
'11': '⑪⑾⒒⓫',
'12': '⑫⑿⒓⓬',
'13': '⑬⒀⒔⓭',
'14': '⑭⒁⒕⓮',
'15': '⑮⒂⒖⓯',
'16': '⑯⒃⒗⓰',
'17': '⑰⒄⒘⓱',
'18': '⑱⒅⒙⓲',
'19': '⑲⒆⒚⓳',
'20': '⑳⒇⒛⓴',
'ae': 'æ',
'OE': 'Œ',
'oe': 'œ',
'pi': '',
'Nj': 'Nj',
'AE': '',
'A': '𝑨𝔄ᗄ𝖠𝗔ꓯ𝞐🄐🄰Ꭿ𐊠𝕬𝜜𝐴ꓮᎪ𝚨ꭺ𝝖🅐Å∀🇦₳🅰𝒜𝘈𝐀𝔸дǺᗅⒶAΑᾋᗩĂÃÅǍȀȂĀȺĄʌΛλƛᴀᴬДАልÄₐᕱªǞӒΆẠẢẦẨẬẮẰẲẴẶᾸᾹᾺΆᾼᾈᾉᾊᾌᾍᾎᾏἈἉἊἋἌἍἎἏḀȦǠӐÀÁÂẤẪ𝛢𝓐𝙰𝘼ᗩ',
'a': '∂⍺ⓐձǟᵃᶏ⒜аɒaαȃȁคǎმäɑāɐąᾄẚạảǡầẵḁȧӑӓãåάὰάăẩằẳặᾀᾁᾂᾃᾅᾆᾰᾱᾲᾳᾴᶐᾶᾷἀἁἂἃἄἅἆἇᾇậắàáâấẫǻⱥ𝐚𝑎𝒂𝒶𝓪𝔞𝕒𝖆𝖺𝗮𝘢𝙖𝚊𝛂𝛼𝜶𝝰𝞪⍶',
'B': '🄑𝔙𝖁ꞵ𝛃𝛽𝜷𝝱𝞫Ᏸ𐌁𝑩𝕭🄱𐊡𝖡𝘽ꓐ𝗕𝘉𝜝𐊂𝚩𝐁𝛣𝝗𝐵𝙱𝔹Ᏼᏼ𝞑Ꞵ𝔅🅑฿𝓑ᗿᗾᗽ🅱ⒷBвϐᗷƁ乃ßცჩ๖βɮБՅ๒ᙖʙᴮᵇጌḄℬΒВẞḂḆɃദᗹᗸᵝᙞᙟᙝᛒᙗᙘᴃ🇧',
'b': 'ꮟᏏ𝐛𝘣𝒷𝔟𝓫𝖇𝖻𝑏𝙗𝕓𝒃𝗯𝚋♭ᑳᒈbᖚᕹᕺⓑḃḅҍъḇƃɓƅᖯƄЬᑲþƂ⒝ЪᶀᑿᒀᒂᒁᑾьƀҌѢѣᔎ',
'C': 'ꞆႠ℃🄒ᏟⲤ🄲ꓚ𐊢𐌂🅲𐐕🅒☾ČÇⒸCↃƇᑕㄈ¢८↻ĈϾՇȻᙅᶜ⒞ĆҀĊ©टƆℂℭϹС匚ḈҪʗᑖᑡᑢᑣᑤᑥⅭ𝐂𝐶𝑪𝒞𝓒𝕮𝖢𝗖𝘊𝘾ᔍ',
'c': '🝌cⅽ𝐜𝑐𝒄𝒸𝓬𝔠𝕔𝖈𝖼𝗰𝘤𝙘𝚌ᴄϲⲥсꮯ𐐽ⲥ𐐽ꮯĉcⓒćčċçҁƈḉȼↄсርᴄϲҫ꒝ςɽϛ𝙲ᑦ᧚𝐜𝑐𝒄𝒸𝓬𝔠𝕔𝖈𝖼𝗰𝘤𝙘𝚌₵🇨ᥴᒼⅽ',
'D': '🄓Ꭰ🄳𝔡𝖉𝔻𝗗𝘋𝙳𝐷𝓓𝐃𝑫𝕯𝖣𝔇𝘿ꭰⅅ𝒟ꓓ🅳🅓ⒹDƉᗪƊÐԺᴅᴰↁḊĐÞⅮᗞᑯĎḌḐḒḎᗫᗬᗟᗠᶛᴆ🇩',
'd': 'Ꮷ𝔡𝖉ᑯꓒ𝓭ᵭ₫ԃⓓdḋďḍḑḓḏđƌɖɗᵈ⒟ԁⅾᶁԀᑺᑻᑼᑽᒄᑰᑱᶑ𝕕𝖽𝑑𝘥𝒅𝙙𝐝𝗱𝚍ⅆ𝒹ʠժ',
'E': '£ᙓ⋿∃ⴺꓱ𝐄𝐸𝔈𝕰𝖤𝘌𝙴𝛦𝜠ꭼ🄔🄴𝙀𝔼𐊆𝚬ꓰ𝝚𝞔𝓔𝑬𝗘🅴🅔ⒺΈEƎἝᕮƐモЄᴇᴱᵉÉ乇ЁɆꂅ€ÈℰΕЕⴹᎬĒĔĖĘĚÊËԐỀẾỄỂẼḔḖẺȄȆẸỆȨḜḘḚἘἙἚἛἜῈΈӖὲέЀϵ🇪',
'e': 'əәⅇꬲꞓ⋴𝛆𝛜𝜀𝜖𝜺𝝐𝝴𝞊𝞮𝟄ⲉꮛ𐐩ꞒⲈ⍷𝑒𝓮𝕖𝖊𝘦𝗲𝚎𝙚𝒆𝔢𝖾𝐞Ҿҿⓔe⒠èᧉéᶒêɘἔềếễ૯ǝєεēҽɛểẽḕḗĕėëẻěȅȇẹệȩɇₑęḝḙḛ℮еԑѐӗᥱёἐἑἒἓἕℯ',
'F': 'ᖵꘘꓞꟻᖷ𝐅𝐹𝑭𝔽𝕱𝖥𝗙𝙁𝙵𝟊℉🄕🄵𐊇𝔉𝘍𐊥ꓝꞘ🅵🅕𝓕ⒻFғҒᖴƑԲϝቻḞℱϜ₣🇫Ⅎ',
'f': '𝐟ᵮ𝑓𝒇𝒻𝓯𝔣𝕗𝖿𝗳𝙛𝚏ꬵꞙẝ𝖋ⓕfƒḟʃբᶠ⒡ſꊰʄ∱ᶂ𝘧',
'G': '𝗚𝘎🄖ꓖᏳ🄶Ꮐᏻ𝔾𝓖𝑮𝕲ꮐ𝒢𝙂𝖦𝙶𝔊𝐺𝐆🅶🅖ⒼGɢƓʛĢᘜᴳǴĠԌĜḠĞǦǤԍ₲🇬⅁',
'g': 'ᶃᶢⓖgǵĝḡğġǧģց૭ǥɠﻭﻮᵍ⒢ℊɡᧁ𝐠𝑔𝒈𝓰𝔤𝕘𝖌𝗀𝗴𝘨𝙜𝚐',
'H': 'Ἤ🄗𝆦🄷𝜢ꓧ𝘏𝐻𝝜𝖧𐋏𝗛ꮋℍᎻℌⲎ𝑯𝞖🅷🅗ዞǶԋⒽHĤᚺḢḦȞḤḨḪĦⱧҢңҤῊΉῌἨἩἪἫἭἮἯᾘᾙᾚᾛᾜᾝᾞᾟӉӈҥΉн卄♓𝓗ℋН𝐇𝙃𝙷ʜ𝛨Η𝚮ᕼӇᴴᵸ🇭',
'h': 'ꞕ৸𝕳ꚕᏲℏӊԊꜧᏂҺ⒣ђⓗhĥḣḧȟḥḩḫẖħⱨհһከኩኪካɦℎ𝐡𝒉𝒽𝓱𝔥𝕙𝖍𝗁𝗵𝘩𝙝𝚑իʰᑋᗁɧんɥ',
'I': 'ⲒἿ🄘🄸ЇꀤᏆ🅸🅘إﺇٳأﺃٲٵⒾI៸ÌÍÎĨĪĬİÏḮỈǏȈȊỊĮḬƗェエῘῙῚΊἸἹἺἻἼἽἾⅠΪΊɪᶦᑊᥣ𝛪𝐈𝙄𝙸𝓵𝙡𝐼ᴵ𝚰𝑰🇮',
'i': '⍳ℹⅈ𝑖𝒊𝒾ı𝚤ɩιιͺ𝛊𝜄𝜾𝞲ꙇӏꭵᎥⓘiìíîĩīĭïḯỉǐȉȋịḭῐῑῒΐῖῗἰἱἲⅰⅼ∣ⵏ│׀ا١۱ߊᛁἳἴἵɨіὶίᶖ𝔦𝚒𝝸𝗂𝐢𝕚𝖎𝗶𝘪𝙞ίⁱᵢ𝓲⒤',
'J': '𝐉𝐽𝑱𝒥𝓙𝔍𝕁𝕵𝖩𝗝𝘑𝙅𝙹ꞲͿꓙ🄙🄹🅹🅙ⒿJЈʝᒍנフĴʆวلյʖᴊᴶﻝጋɈⱼՂๅႱįᎫȷ丿ℐℑᒘᒙᒚᒛᒴᒵᒎᒏ🇯',
'j': '𝚥ꭻⅉⓙjϳʲ⒥ɉĵǰјڶᶨ𝒿𝘫𝗷𝑗𝙟𝔧𝒋𝗃𝓳𝕛𝚓𝖏𝐣',
'K': '𝐊ꝄꝀ𝐾𝑲𝓚𝕶𝖪𝙺𝚱𝝟🄚𝗞🄺𝜥𝘒ꓗ𝙆𝕂Ⲕ𝔎𝛫Ꮶ𝞙𝒦🅺🅚₭ⓀKĸḰќƘкҠκқҟӄʞҚКҡᴋᴷᵏ⒦ᛕЌጕḲΚKҜҝҞĶḴǨⱩϗӃ🇰',
'k': 'ⓚꝁkḱǩḳķḵƙⱪᶄ𝐤𝘬𝗄𝕜𝜅𝜘𝜿𝝒𝝹𝞌𝞳𝙠𝚔𝑘𝒌ϰ𝛋𝛞𝟆𝗸𝓴𝓀',
'L': '𝐋𝐿𝔏𝕃𝕷𝖫𝗟𝘓𝙇ﴼ🄛🄻𐐛Ⳑ𝑳𝙻𐑃𝓛ⳑꮮᏞꓡ🅻🅛ﺈ└ⓁւLĿᒪ乚ՆʟꓶιԼᴸˡĹረḶₗΓլĻᄂⅬℒⱢᥧᥨᒻᒶᒷᶫﺎᒺᒹᒸᒫ⎳ㄥŁⱠﺄȽ🇱',
'l': 'ⓛlŀĺľḷḹļӀℓḽḻłレɭƚɫⱡ|Ɩ⒧ʅǀוןΙІ|ᶩӏ𝓘𝕀𝖨𝗜𝘐𝐥𝑙𝒍𝓁𝔩𝕝𝖑𝗅𝗹𝘭𝚕𝜤𝝞ı𝚤ɩι𝛊𝜄𝜾𝞲',
'M': 'ꮇ🄜🄼𐌑𐊰ꓟⲘᎷ🅼🅜ⓂMмṂ൱ᗰ州ᘻო๓♏ʍᙏᴍᴹᵐ⒨ḾМṀ௱ⅯℳΜϺᛖӍӎ𝐌𝑀𝑴𝓜𝔐𝕄𝕸𝖬𝗠𝘔𝙈𝙼𝚳𝛭𝜧𝝡𝞛🇲',
'm': '₥ᵯ𝖒𝐦𝗆𝔪𝕞𝓂ⓜmനᙢ൩ḿṁⅿϻṃጠɱ៳ᶆ𝙢𝓶𝚖𝑚𝗺᧕᧗',
'N': '𝇙𝇚𝇜🄝𝆧𝙉🄽ℕꓠ𝛮𝝢𝙽𝚴𝑵𝑁Ⲛ𝐍𝒩𝞜𝗡𝘕𝜨𝓝𝖭🅽₦🅝ЙЍⓃҋ៷NᴎɴƝᑎ几иՈռИהЛπᴺᶰŃ刀ክṄⁿÑПΝᴨոϖǸŇṆŅṊṈทŊӢӣӤӥћѝйᥢҊᴻ🇳',
'n': 'ոռח𝒏𝓷𝙣𝑛𝖓𝔫𝗇𝚗𝗻ᥒⓝήnǹᴒńñᾗηṅňṇɲņṋṉղຖՌƞŋ⒩ภกɳпʼnлԉȠἠἡῃդᾐᾑᾒᾓᾔᾕᾖῄῆῇῂἢἣἤἥἦἧὴήበቡቢባቤብቦȵ𝛈𝜂𝜼𝝶𝞰𝕟𝘯𝐧𝓃ᶇᵰᥥ∩',
'O': '𝜽⭘🔿ꭴ⭕⏺🄁🄀Ꭴ𝚯𝚹𝛩𝛳𝜣𝜭𝝝𝝧𝞗𝞡ⴱᎾᏫ⍬𝞱𝝷𝛉𝟎𝜃θ𝟘𝑂𝑶𝓞𝔒𝕆𝕺𝗢𝘖𝙊𝛰㈇ꄲ🄞🔾🄾𐊒𝟬ꓳⲞ𐐄𐊫𐓂𝞞🅞⍥◯ⵁ⊖0⊝𝝤Ѳϴ𝚶𝜪ѺӦӨӪΌʘ𝐎ǑÒŎÓÔÕȌȎㇿ❍ⓄOὋロ૦⊕ØФԾΘƠᴼᵒ⒪ŐÖₒ¤◊Φ〇ΟОՕଠഠ௦סỒỐỖỔṌȬṎŌṐṒȮȰȪỎỜỚỠỞỢỌỘǪǬǾƟⵔ߀៰⍜⎔⎕⦰⦱⦲⦳⦴⦵⦶⦷⦸⦹⦺⦻⦼⦽⦾⦿⧀⧁⧂⧃ὈὉὊὌὍ',
'o': 'ంಂംං૦௦۵ℴ𝑜𝒐𝖔ꬽ𝝄𝛔𝜎𝝈𝞂ჿ𝚘০୦ዐ𝛐𝗈𝞼ဝⲟ𝙤၀𐐬𝔬𐓪𝓸🇴⍤○ϙ🅾𝒪𝖮𝟢𝟶𝙾𝘰𝗼𝕠𝜊𝐨𝝾𝞸ᐤⓞѳ᧐ᥲðoఠᦞՓòөӧóºōôǒȏŏồốȍỗổõσṍȭṏὄṑṓȯȫ๏ᴏőöѻоዐǭȱ০୦٥౦೦൦๐໐οօᴑ०੦ỏơờớỡởợọộǫøǿɵծὀὁόὸόὂὃὅ',
'P': '🄟🄿ꓑ𝚸𝙿𝞠𝙋ꮲⲢ𝒫𝝦𝑃𝑷𝗣𝐏𐊕𝜬𝘗𝓟𝖯𝛲Ꮲ🅟Ҏ🅿ⓅPƤᑭ尸Ṗրφքᴘᴾᵖ⒫ṔアקРየᴩⱣℙΡῬᑸᑶᑷᑹᑬᑮ🇵₱',
'p': 'ⲣҏ℗ⓟpṕṗƥᵽῥρрƿǷῤ⍴𝓹𝓅𝐩𝑝𝒑𝔭𝕡𝖕𝗉𝗽𝘱𝙥𝚙𝛒𝝆𝞺𝜌𝞀',
'Q': '🅀🄠Ꝗ🆀🅠ⓆQℚⵕԚ𝐐𝑄𝑸𝒬𝓠𝚀𝘘𝙌𝖰𝕼𝔔𝗤🇶',
'q': '𝓆ꝗ𝗾ⓠqգ⒬۹զᑫɋɊԛ𝗊𝑞𝘲𝕢𝚚𝒒𝖖𝐪𝔮𝓺𝙦',
'R': '℞🄡℟ꭱᏒ𐒴ꮢᎡꓣ🆁🅡ⓇRᴙȒʀᖇя尺ŔЯરƦᴿዪṚɌʁℛℜℝṘŘȐṜŖṞⱤ𝐑𝑅𝑹𝓡𝕽𝖱𝗥𝘙𝙍𝚁ᚱ🇷ᴚ',
'r': '𝚛ꭇᣴℾ𝚪𝛤𝜞𝝘𝞒ⲄГᎱᒥꭈⲅꮁⓡrŕṙřȑȓṛṝŗгՐɾᥬṟɍʳ⒭ɼѓᴦᶉ𝐫𝑟𝒓𝓇𝓻𝔯𝕣𝖗𝗋𝗿𝘳𝙧ᵲґᵣ',
'S': '🅂🄪🄢ꇙ𝓢𝗦Ꮪ𝒮Ꮥ𝚂𝐒ꓢ𝖲𝔖𝙎𐊖𝕾𐐠𝘚𝕊𝑆𝑺🆂🅢ⓈSṨŞֆՏȘˢ⒮ЅṠŠŚṤŜṦṢടᔕᔖᔢᔡᔣᔤ',
's': 'ᣵⓢꜱ𐑈ꮪsśṥŝṡšṧʂṣṩѕşșȿᶊక𝐬𝑠𝒔𝓈𝓼𝔰𝕤𝖘𝗌𝘀𝘴𝙨𝚜ގ🇸',
'T': '🅃🄣七ፒ𝜯🆃𐌕𝚻𝛵𝕋𝕿𝑻𐊱𐊗𝖳𝙏🝨𝝩𝞣𝚃𝘛𝑇ꓔ⟙𝐓Ⲧ𝗧⊤𝔗Ꭲꭲ𝒯🅣⏇⏉ⓉTтҬҭƬイŦԵτᴛᵀイፕϮŤ⊥ƮΤТ下ṪṬȚŢṰṮ丅丁ᐪ𝛕𝜏𝝉𝞃𝞽𝓣ㄒ🇹ጥ',
't': 'ⓣtṫẗťṭțȶ੮էʇ†ţṱṯƭŧᵗ⒯ʈեƫ𝐭𝑡𝒕𝓉𝓽𝔱𝕥𝖙𝗍𝘁𝘵𝙩𝚝ナ',
'U': '🅄Џ🄤ሀꓴ𐓎꒤🆄🅤ŨŬŮᑗᑘǓǕǗǙⓊUȖᑌ凵ƱմԱꓵЦŪՄƲᙀᵁᵘ⒰ŰપÜՍÙÚÛṸṺǛỦȔƯỪỨỮỬỰỤṲŲṶṴɄᥩᑧ∪ᘮ⋃𝐔𝑈𝑼𝒰𝓤𝔘𝕌𝖀𝖴𝗨𝘜𝙐𝚄🇺',
'u': '𝘂𝘶𝙪𝚞ꞟꭎꭒ𝛖𝜐𝝊𝞄𝞾𐓶ὺύⓤuùũūừṷṹŭǖữᥙǚǜὗυΰนսʊǘǔúůᴜűųยûṻцሁüᵾᵤµʋủȕȗưứửựụṳṵʉῠῡῢΰῦῧὐὑϋύὒὓὔὕὖᥔ𝐮𝑢𝒖𝓊𝓾𝔲𝕦𝖚𝗎ᶙ',
'V': '𝑉𝒱𝕍𝗩🄥🅅ꓦ𝑽𝖵𝘝Ꮩ𝚅𝙑𝐕🆅🅥ⓋVᐯѴᵛ⒱۷ṾⅴⅤṼ٧ⴸѶᐺᐻ🇻𝓥',
'v': '∨⌄⋁ⅴ𝐯𝑣𝒗𝓋𝔳𝕧𝖛𝗏ꮩሀⓥv𝜐𝝊ṽṿ౮งѵעᴠνטᵥѷ៴ᘁ𝙫𝚟𝛎𝜈𝝂𝝼𝞶𝘷𝘃𝓿',
'W': '𝐖𝑊𝓦𝔚𝕎𝖂𝖶𝗪𝙒𝚆🄦🅆ᏔᎳ𝑾ꓪ𝒲𝘞🆆Ⓦ🅦wWẂᾧᗯᥕ山ѠຟచաЩШώщฬшᙎᵂʷ⒲ฝሠẄԜẀŴẆẈധᘺѿᙡƜ₩🇼',
'w': '𝐰ꝡ𝑤𝒘𝓌𝔀𝔴𝕨𝖜𝗐𝘄𝘸𝙬𝚠աẁꮃẃⓦ⍵ŵẇẅẘẉⱳὼὠὡὢὣωὤὥὦὧῲῳῴῶῷⱲѡԝᴡώᾠᾡᾢᾣᾤᾥᾦɯ𝝕𝟉𝞏',
'X': 'ꭓꭕ𝛘𝜒𝝌𝞆𝟀ⲭ🞨𝑿𝛸🄧🞩🞪🅇🞫🞬𐌗Ⲭꓫ𝖃𝞦𝘟𐊐𝚾𝝬𝜲Ꭓ𐌢𝖷𝑋𝕏𝔛𐊴𝗫🆇🅧❌Ⓧ𝓧XẊ᙭χㄨ𝒳ӾჯӼҳЖΧҲᵡˣ⒳אሸẌꊼⅩХ╳᙮ᕁᕽⅹᚷⵝ𝙓𝚇乂𝐗🇽',
'x': '᙮ⅹ𝑥𝒙𝓍𝔵𝕩𝖝𝗑𝘅ᕁᕽⓧxхẋ×ₓ⤫⤬⨯ẍᶍ𝙭ӽ𝘹𝐱𝚡⨰メ𝔁',
'Y': '𝒴🄨𝓨𝔜𝖄𝖸𝘠𝙔𝚼𝛶𝝪𝞤УᎩᎽⲨ𝚈𝑌𝗬𝐘ꓬ𝒀𝜰𐊲🆈🅨ⓎYὛƳㄚʏ⅄ϔ¥¥ՎϓγץӲЧЎሃŸɎϤΥϒҮỲÝŶỸȲẎỶỴῨῩῪΎὙὝὟΫΎӮӰҰұ𝕐🇾',
'y': '𝐲𝑦𝒚𝓎𝔂𝔶𝕪𝖞𝗒𝘆𝘺𝙮𝚢ʏỿꭚγℽ𝛄𝛾𝜸𝝲𝞬🅈ᎽᎩⓨyỳýŷỹȳẏÿỷуყẙỵƴɏᵞɣʸᶌү⒴ӳӱӯўУʎ',
'Z': '🄩🅉ꓜ𝗭𝐙☡Ꮓ𝘡🆉🅩ⓏZẔƵ乙ẐȤᶻ⒵ŹℤΖŻŽẒⱫ🇿',
'z': '𝑍𝒁𝒵𝓩𝖹𝙕𝚉𝚭𝛧𝜡𝝛𝞕ᵶꮓ𝐳𝑧𝒛𝓏𝔃𝔷𝕫𝖟𝗓𝘇𝘻𝙯𝚣ⓩzźẑżžẓẕƶȥɀᴢጊʐⱬᶎʑᙆ'
}

View File

@@ -0,0 +1,9 @@
stashconfig = {
"scheme": "http",
"Host":"localhost",
"Port": "9999",
"ApiKey": "",
}
success_tag = "[SHA: Scraped]"
failure_tag = "[SHA: No Match]"
disable_nfkd = False

View File

@@ -0,0 +1,34 @@
from characters import characters
from util import checkLNP, clean
import math
# The current cache of all the supported alphabet characters
alphabetMap = dict()
# The current cache of all the supported confusable characters
confusablesMap = dict()
for key, value in characters.items():
alphabetMap[key] = value
for char in value:
confusablesMap[char] = key
# Removes confusable unicode characters from a string.
def remove(str):
if checkLNP(str):
return str;
newStr = '';
for char in clean(str):
newStr += confusablesMap.get(char) or char
return newStr;
# Randomly mixes up a string with random confusable characters.
def obfuscate(str):
newStr = '';
for char in str:
charMap = alphabetMap.get(char);
if (charMap):
newStr += charMap[math.floor(math.random() * charMap.length)];
else:
newStr += char;
return newStr;

View File

@@ -0,0 +1,33 @@
import os
from sqlite import get_rows
import config
stashconfig = config.stashconfig if hasattr(config, 'stashconfig') else {
"scheme": "http",
"Host":"localhost",
"Port": "9999",
"ApiKey": "",
}
try:
import stashapi.log as log
from stashapi.stashapp import StashInterface
except ModuleNotFoundError:
print("You need to install the stashapp-tools (stashapi) python module. (cmd): pip install stashapp-tools", file=sys.stderr)
sys.exit()
stash = StashInterface(stashconfig)
if os.path.exists("sha-cache.db"):
log.info("migrating sha256 values to fingerprints...")
for sha256, oshash in get_rows():
log.info(f"{sha256=} {oshash=}")
scene = stash.find_scene_by_hash({"oshash":oshash}, fragment='id files { id fingerprint(type:"sha256") } ')
if scene["files"][0]["fingerprint"]:
return
stash.file_set_fingerprints(scene["files"][0]["id"], {"type": "sha256", "value":sha256})
os.rename("sha-cache.db", "sha-cache.db.old")

View File

@@ -0,0 +1,45 @@
import re
dmRegex = r"\b(dm)(?:[\'\\\`\"\\”]*)(?:s?)\b"
triggerArray = [
# DM / in your DMs
"dm",
"dms"
"inbox",
"messages"
# sending
"sending you",
"sending this",
# partial video
"teaser",
"snippet",
"entire",
"full video",
"full vid",
"full scene",
# message prompts
"with the message",
"message me",
"send me"
# unlocking
"unlock",
"receive it",
"purchase",
# tipping
"under this post",
"tip",
# rebill
"rebills",
"rebillers",
]
def findTrailerTrigger(oftitle):
# check regex
if re.search(dmRegex, oftitle, re.IGNORECASE):
return True
# check other regex array
for trigger in triggerArray:
triggerRegex = f"\b{trigger}\b"
if re.search(triggerRegex, oftitle, re.IGNORECASE):
return True
return False

View File

@@ -0,0 +1,4 @@
emojis
requests
lxml
stashapp-tools>=0.2.40

View File

@@ -0,0 +1,26 @@
import sqlite3
db = sqlite3.connect('sha-cache.db')
cursor = db.cursor()
def setup_sqlite():
# set up migrations
cursor.execute("""
CREATE TABLE IF NOT EXISTS sha_cache (
sha256 TEXT NOT NULL,
oshash TEXT NOT NULL
);""")
cursor.execute("CREATE INDEX IF NOT EXISTS oshash_index ON sha_cache (oshash);")
db.commit()
def add_sha256(sha256, oshash):
cursor.execute("INSERT INTO sha_cache VALUES (?, ?)", (sha256, oshash))
db.commit()
def lookup_sha(oshash):
cursor.execute("SELECT sha256 FROM sha_cache WHERE oshash = ?", [oshash])
return cursor.fetchone()
def get_rows():
cursor.execute("SELECT sha256, oshash FROM sha_cache")
return cursor.fetchall()

18
scrapers/OnlyFans/util.py Normal file
View File

@@ -0,0 +1,18 @@
import re
# @copyright Mathias Bynens <https://mathiasbynens.be/>. MIT license.
regexSymbolWithCombiningMarks = re.compile("([\0-\u02FF\u0370-\u1AAF\u1B00-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uE000-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])([\u0300-\u036F\u1AB0-\u1AFF\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]+)")
regexLineBreakCombiningMarks = re.compile("[\0-\x08\x0E-\x1F\x7F-\x84\x86-\x9F\u0300-\u034E\u0350-\u035B\u0363-\u036F\u0483-\u0489\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u061C\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u08D4-\u08E1\u08E3-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u0A01-\u0A03\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A70\u0A71\u0A75\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AE2\u0AE3\u0B01-\u0B03\u0B3C\u0B3E-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B62\u0B63\u0B82\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C00-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C62\u0C63\u0C81-\u0C83\u0CBC\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CE2\u0CE3\u0D01-\u0D03\u0D3E-\u0D44\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D62\u0D63\u0D82\u0D83\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F7E\u0F80-\u0F84\u0F86\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u135D-\u135F\u1712-\u1714\u1732-\u1734\u1752\u1753\u1772\u1773\u180B-\u180D\u1885\u1886\u18A9\u1920-\u192B\u1930-\u193B\u1A17-\u1A1B\u1A7F\u1AB0-\u1ABE\u1B00-\u1B04\u1B34-\u1B44\u1B6B-\u1B73\u1B80-\u1B82\u1BA1-\u1BAD\u1BE6-\u1BF3\u1C24-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE8\u1CED\u1CF2-\u1CF4\u1CF8\u1CF9\u1DC0-\u1DF5\u1DFB-\u1DFF\u200C\u200E\u200F\u202A-\u202E\u2066-\u206F\u20D0-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302F\u3035\u3099\u309A\uA66F-\uA672\uA674-\uA67D\uA69E\uA69F\uA6F0\uA6F1\uA802\uA806\uA80B\uA823-\uA827\uA880\uA881\uA8B4-\uA8C5\uA8E0-\uA8F1\uA926-\uA92D\uA947-\uA953\uA980-\uA983\uA9B3-\uA9C0\uAA29-\uAA36\uAA43\uAA4C\uAA4D\uAAEB-\uAAEF\uAAF5\uAAF6\uABE3-\uABEA\uABEC\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F\uFFF9-\uFFFB]|\uD800[\uDDFD\uDEE0\uDF76-\uDF7A]|\uD802[\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F\uDEE5\uDEE6]|\uD804[\uDC00-\uDC02\uDC38-\uDC46\uDC7F-\uDC82\uDCB0-\uDCBA\uDD00-\uDD02\uDD27-\uDD34\uDD73\uDD80-\uDD82\uDDB3-\uDDC0\uDDCA-\uDDCC\uDE2C-\uDE37\uDE3E\uDEDF-\uDEEA\uDF00-\uDF03\uDF3C\uDF3E-\uDF44\uDF47\uDF48\uDF4B-\uDF4D\uDF57\uDF62\uDF63\uDF66-\uDF6C\uDF70-\uDF74]|\uD805[\uDC35-\uDC46\uDCB0-\uDCC3\uDDAF-\uDDB5\uDDB8-\uDDC0\uDDDC\uDDDD\uDE30-\uDE40\uDEAB-\uDEB7]|\uD807[\uDC2F-\uDC36\uDC38-\uDC3F\uDC92-\uDCA7\uDCA9-\uDCB6]|\uD81A[\uDEF0-\uDEF4\uDF30-\uDF36]|\uD81B[\uDF51-\uDF7E\uDF8F-\uDF92]|\uD82F[\uDC9D\uDC9E\uDCA0-\uDCA3]|\uD834[\uDD65-\uDD69\uDD6D-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44]|\uD836[\uDE00-\uDE36\uDE3B-\uDE6C\uDE75\uDE84\uDE9B-\uDE9F\uDEA1-\uDEAF]|\uD838[\uDC00-\uDC06\uDC08-\uDC18\uDC1B-\uDC21\uDC23\uDC24\uDC26-\uDC2A]|\uD83A[\uDCD0-\uDCD6\uDD44-\uDD4A]|\uDB40[\uDC01\uDC20-\uDC7F\uDD00-\uDDEF]")
checkLNPRegex = re.compile("^(?:[~`!@#%^&*()\{\}\[\];:\"'<,.>?/\\|_+=-]|[a-zA-Z0-9\s])+$")
def checkLNP(str):
return checkLNPRegex.match(str);
"""
Utility function to call 2 other functions which remove Combining Marks/Invisible characters
"""
def clean(str):
str = re.sub(regexLineBreakCombiningMarks, '', str)
str = re.sub(regexSymbolWithCombiningMarks, '$1', str)
str = re.sub(r'[\u200B-\u200D\uFEFF\u2063]', '', str)
return str