mirror of
https://github.com/BillyOutlast/metadata-scrapers.git
synced 2026-02-04 03:01:16 +01:00
[build-site] simplify script
This commit is contained in:
committed by
feederbox826
parent
a3b88ffdc5
commit
b6dedaa54d
@@ -47,12 +47,15 @@ buildScraper()
|
||||
# always ignore package file
|
||||
ignore="-x $ignore package"
|
||||
|
||||
# For any directory, we want to include the target yml file and all non-yml files
|
||||
pushd "$dir" > /dev/null
|
||||
if [ "$dir" != "./scrapers" ]; then
|
||||
zip -r "$zipfile" . ${ignore} > /dev/null
|
||||
else
|
||||
zip "$zipfile" "$scraper_id.yml" > /dev/null
|
||||
fi
|
||||
# First zip just the target yml file
|
||||
zip "$zipfile" "$scraper_id.yml" > /dev/null
|
||||
|
||||
# Then find and add all non-yml files in the current directory
|
||||
find . -type f ! -name "*.yml" -print0 | while read -d $'\0' file; do
|
||||
zip -g "$zipfile" "$file" ${ignore} > /dev/null
|
||||
done
|
||||
popd > /dev/null
|
||||
|
||||
# write to spec index
|
||||
@@ -74,12 +77,8 @@ buildScraper()
|
||||
echo "" >> "$outdir"/index.yml
|
||||
}
|
||||
|
||||
# find all yml files in ./scrapers - these are packages individually
|
||||
for f in ./scrapers/*/*.yml; do
|
||||
buildScraper "$f"
|
||||
done
|
||||
|
||||
find ./scrapers/ -mindepth 3 -name *.yml -print0 | while read -d $'\0' f; do
|
||||
# skip scrapers in root directory
|
||||
find ./scrapers/ -mindepth 2 -name *.yml -print0 | while read -d $'\0' f; do
|
||||
buildScraper "$f"
|
||||
done
|
||||
|
||||
|
||||
@@ -211,11 +211,11 @@ jsonScrapers:
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: .+Profile\/\d+\/(.+)\/Store.+
|
||||
with: $1 (ManyVids)
|
||||
with: "$1 (ManyVids)"
|
||||
URL:
|
||||
selector: data.model.profileUrl
|
||||
postProcess:
|
||||
- replace:
|
||||
- regex: ^
|
||||
with: https://www.manyvids.com
|
||||
# Last Updated December 31, 2024
|
||||
# Last Updated December 31, 2024
|
||||
|
||||
@@ -39,4 +39,4 @@ jsonScrapers:
|
||||
- replace:
|
||||
- regex: (.+)
|
||||
with: "$1 (ManyVids)"
|
||||
# Last Updated October 20, 2024
|
||||
# Last Updated December 27, 2024
|
||||
|
||||
330
scrapers/OnlyFans/SHALookup.py
Normal file
330
scrapers/OnlyFans/SHALookup.py
Normal file
@@ -0,0 +1,330 @@
|
||||
# stdlib
|
||||
import time
|
||||
from datetime import datetime
|
||||
import hashlib
|
||||
from html import unescape
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
import re
|
||||
import sys
|
||||
# local modules
|
||||
from confusables import remove
|
||||
from oftitle import findTrailerTrigger
|
||||
|
||||
# try importing config
|
||||
import config
|
||||
stashconfig = config.stashconfig if hasattr(config, 'stashconfig') else {
|
||||
"scheme": "http",
|
||||
"Host":"localhost",
|
||||
"Port": "9999",
|
||||
"ApiKey": "",
|
||||
}
|
||||
success_tag = config.success_tag if hasattr(config, 'success_tag') else "SHA: Match"
|
||||
failure_tag = config.failure_tag if hasattr(config, 'failure_tag') else "SHA: No Match"
|
||||
|
||||
VERSION = "1.6.0"
|
||||
MAX_TITLE_LENGTH = 64
|
||||
|
||||
# pip modules
|
||||
try:
|
||||
import stashapi.log as log
|
||||
from stashapi.stashapp import StashInterface
|
||||
except ModuleNotFoundError:
|
||||
print("You need to install the stashapp-tools (stashapi) python module. (cmd): pip install stashapp-tools", file=sys.stderr)
|
||||
sys.exit()
|
||||
try:
|
||||
import emojis
|
||||
except ModuleNotFoundError:
|
||||
log.error("You need to install the emojis module. (https://pypi.org/project/emojis/)")
|
||||
log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install emojis")
|
||||
sys.exit()
|
||||
try:
|
||||
import requests
|
||||
except ModuleNotFoundError:
|
||||
log.error("You need to install the requests module. (https://docs.python-requests.org/en/latest/user/install/)")
|
||||
log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install requests")
|
||||
sys.exit()
|
||||
try:
|
||||
from lxml import html
|
||||
except ModuleNotFoundError:
|
||||
log.error("You need to install the lxml module. (https://lxml.de/installation.html#installation)")
|
||||
log.error("If you have pip (normally installed with python), run this command in a terminal (cmd): pip install lxml")
|
||||
sys.exit()
|
||||
|
||||
# calculate sha256
|
||||
def compute_sha256(file_name):
|
||||
hash_sha256 = hashlib.sha256()
|
||||
with open(file_name, 'rb') as f:
|
||||
for chunk in iter(lambda: f.read(4096), b""):
|
||||
hash_sha256.update(chunk)
|
||||
return hash_sha256.hexdigest()
|
||||
|
||||
def sha_file(file):
|
||||
try:
|
||||
return compute_sha256(file['path'])
|
||||
except FileNotFoundError:
|
||||
try:
|
||||
log.debug(f"file path: {file['path']}")
|
||||
# try looking in relative path
|
||||
# move up two directories from /scrapers/SHALookup
|
||||
newpath = os.path.join(Path.cwd().parent.parent, file['path'])
|
||||
return compute_sha256(newpath)
|
||||
except FileNotFoundError:
|
||||
log.error("File not found. Check if the file exists and is accessible.")
|
||||
print("null")
|
||||
sys.exit()
|
||||
|
||||
# get post
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0',
|
||||
'Referer': 'https://coomer.su/search_hash'
|
||||
}
|
||||
|
||||
# define stash globally
|
||||
stash = StashInterface(stashconfig)
|
||||
|
||||
def add_sha256(sha256, oshash):
|
||||
scene = stash.find_scene_by_hash({"oshash":oshash}, fragment='id files { id fingerprint(type:"sha256") } ')
|
||||
if scene["files"][0]["fingerprint"]:
|
||||
return
|
||||
stash.file_set_fingerprints(scene["files"][0]["id"], {"type": "sha256", "value":sha256})
|
||||
|
||||
|
||||
def getPostByHash(hash):
|
||||
for attempt in range(1, 5):
|
||||
shares = requests.get('https://coomer.su/api/v1/search_hash/' + hash, headers=headers, timeout=10)
|
||||
if shares.status_code == 200:
|
||||
break
|
||||
log.debug(f"Request status code: {shares.status_code}")
|
||||
time.sleep(2)
|
||||
shares.raise_for_status()
|
||||
data = shares.json()
|
||||
if (shares.status_code == 404 or len(data) == 0):
|
||||
log.debug("No results found")
|
||||
return None
|
||||
# construct url to fetch from API
|
||||
post = data['posts'][0]
|
||||
path = f'https://coomer.su/api/v1/{post["service"]}/user/{post["user"]}/post/{post["id"]}'
|
||||
# fetch post
|
||||
postres = requests.get(path, headers=headers)
|
||||
if postres.status_code == 404:
|
||||
log.error("Post not found")
|
||||
sys.exit(1)
|
||||
elif not postres.status_code == 200:
|
||||
log.error(f"Request failed with status code {postres.status}")
|
||||
sys.exit(1)
|
||||
scene = postres.json()
|
||||
scene = scene["post"]
|
||||
return splitLookup(scene, hash)
|
||||
|
||||
def splitLookup(scene, hash):
|
||||
if (scene['service'] == "fansly"):
|
||||
return parseFansly(scene, hash)
|
||||
else:
|
||||
return parseOnlyFans(scene, hash)
|
||||
|
||||
def searchPerformers(scene):
|
||||
pattern = re.compile(r"(?:^|\s)@([\w\-\.]+)")
|
||||
content = unescape(scene['content'])
|
||||
# if title is truncated, remove trailing dots and skip searching title
|
||||
if scene['title'].endswith('..') and scene['title'].removesuffix('..') in content:
|
||||
searchtext = content
|
||||
else:
|
||||
# if title is unique, search title and content
|
||||
searchtext = scene['title'] + " " + content
|
||||
usernames = re.findall(pattern,unescape(searchtext))
|
||||
return usernames
|
||||
|
||||
# from dolphinfix
|
||||
def truncate_title(title, max_length):
|
||||
# Check if the title is already under max length
|
||||
if len(title) <= max_length:
|
||||
return title
|
||||
last_punctuation_index = -1
|
||||
punctuation_chars = {'.', '!', '?', '❤', '☺'}
|
||||
punctuation_chars.update(emojis.get(title))
|
||||
for c in punctuation_chars:
|
||||
last_punctuation_index = max(title.rfind(c, 0, max_length), last_punctuation_index)
|
||||
if last_punctuation_index != -1:
|
||||
return title[:last_punctuation_index+1]
|
||||
# Find the last space character before max length
|
||||
last_space_index = title.rfind(" ",0, max_length)
|
||||
# truncate at last_space_index if valid, else max_length
|
||||
title_end = last_space_index if last_space_index != -1 else max_length
|
||||
return title[:title_end]
|
||||
|
||||
def normalize_title(title):
|
||||
unconfused = remove(title)
|
||||
return unconfused.strip()
|
||||
|
||||
# from dolphinfix
|
||||
def format_title(description, username, date):
|
||||
firstline = description.split("\n")[0].strip().replace("<br />", "")
|
||||
formatted_title = truncate_title(
|
||||
normalize_title(firstline), MAX_TITLE_LENGTH
|
||||
)
|
||||
if not len(description): # no description, return username and date
|
||||
return username + " - " + date
|
||||
elif len(formatted_title) <= 5: # title too short, add date
|
||||
return formatted_title + " - " + date
|
||||
elif not bool(re.search("[A-Za-z0-9]", formatted_title)): # textless, truncate and add date
|
||||
# decrease MAX_TITLE_LENGTH further to account for " - YYYY-MM-DD"
|
||||
return truncate_title(formatted_title, MAX_TITLE_LENGTH - 13) + " - " + date
|
||||
else:
|
||||
return formatted_title
|
||||
|
||||
def parseAPI(scene, hash):
|
||||
date = datetime.strptime(scene['published'], '%Y-%m-%dT%H:%M:%S').strftime('%Y-%m-%d')
|
||||
result = {}
|
||||
scene['content'] = unescape(scene['content']).replace("<br />", "\n")
|
||||
# title parsing
|
||||
result['Details'] = scene['content']
|
||||
result['Date'] = date
|
||||
result['Studio'] = {}
|
||||
result['Performers'] = []
|
||||
result['Tags'] = []
|
||||
# parse usernames
|
||||
usernames = searchPerformers(scene)
|
||||
log.debug(f"{usernames=}")
|
||||
for name in list(set(usernames)):
|
||||
name = name.strip('.') # remove trailing full stop
|
||||
result['Performers'].append({'Name': getnamefromalias(name)})
|
||||
# figure out multi-part scene
|
||||
# create array with file and attachments
|
||||
if (scene['file']):
|
||||
files = [scene['file']] + scene['attachments']
|
||||
else:
|
||||
files = scene['attachments']
|
||||
# only include videos
|
||||
files = [file for file in files if file['path'].endswith(".m4v") or file['path'].endswith(".mp4")]
|
||||
for i, file in enumerate(files):
|
||||
if hash in file['path']:
|
||||
scene['part'] = i + 1
|
||||
scene['total'] = len(files)
|
||||
# add studio in specific function
|
||||
return result, scene
|
||||
|
||||
# alias search
|
||||
def getnamefromalias(alias):
|
||||
perfs = stash.find_performers( f={"aliases":{"value": alias, "modifier":"EQUALS"}}, filter={"page":1, "per_page": 5}, fragment= "name" )
|
||||
log.debug(perfs)
|
||||
if len(perfs):
|
||||
return perfs[0]['name']
|
||||
return alias
|
||||
|
||||
def getFanslyUsername(id):
|
||||
res = requests.get(f"https://coomer.su/api/v1/fansly/user/{id}/profile", headers=headers)
|
||||
if not res.status_code == 200:
|
||||
log.error(f"Request failed with status code {res.status}")
|
||||
sys.exit(1)
|
||||
profile = res.json()
|
||||
return profile["name"]
|
||||
|
||||
# if fansly
|
||||
def parseFansly(scene, hash):
|
||||
# fetch scene
|
||||
result, scene = parseAPI(scene, hash)
|
||||
# look up performer username
|
||||
username = getFanslyUsername(scene['user'])
|
||||
result['Title'] = format_title(result['Details'], username, result['Date'])
|
||||
# add part on afterwards
|
||||
if scene['total'] > 1:
|
||||
result['Title'] += f" {scene['part']}/{scene['total']}"
|
||||
# craft fansly URL
|
||||
result['URL'] = f"https://fansly.com/post/{scene['id']}"
|
||||
# add studio and performer
|
||||
result['Studio']['Name'] = f"{username} (Fansly)"
|
||||
result['Performers'].append({ 'Name': getnamefromalias(username) })
|
||||
# Add trailer if hash matches preview
|
||||
for attachment in scene['attachments']:
|
||||
if 'preview' in attachment['name'] and hash in attachment['path']:
|
||||
result['Tags'].append({ "Name": 'Trailer' })
|
||||
break
|
||||
return result
|
||||
|
||||
# if onlyfans
|
||||
def parseOnlyFans(scene, hash):
|
||||
# fetch scene
|
||||
result, scene = parseAPI(scene, hash)
|
||||
username = scene['user']
|
||||
result['Title'] = format_title(result['Details'], username, result['Date'])
|
||||
# add part on afterwards
|
||||
if scene['total'] > 1:
|
||||
result['Title'] += f" {scene['part']}/{scene['total']}"
|
||||
# craft OnlyFans URL
|
||||
result['URL'] = f"https://onlyfans.com/{scene['id']}/{username}"
|
||||
# add studio and performer
|
||||
result['Studio']['Name'] = f"{username} (OnlyFans)"
|
||||
result['Performers'].append({ 'Name': getnamefromalias(username) })
|
||||
# add trailer tag if contains keywords
|
||||
if findTrailerTrigger(result['Details']):
|
||||
result['Tags'].append({ "Name": 'Trailer' })
|
||||
return result
|
||||
|
||||
def hash_file(file):
|
||||
fingerprints = file['fingerprints']
|
||||
if sha256_fp := [fp for fp in fingerprints if fp['type'] == 'sha256']:
|
||||
log.debug("Found in fingerprints")
|
||||
return sha256_fp[0]['value']
|
||||
else:
|
||||
log.debug("Not found in fingerprints")
|
||||
oshash = [fp for fp in fingerprints if fp['type'] == 'oshash'][0]['value']
|
||||
sha256 = sha_file(file)
|
||||
add_sha256(sha256, oshash)
|
||||
return sha256
|
||||
|
||||
def check_video_vertical(scene):
|
||||
file = scene['files'][0]
|
||||
ratio = file['height'] / file['width']
|
||||
return ratio >= 1.5
|
||||
|
||||
def scrape():
|
||||
FRAGMENT = json.loads(sys.stdin.read())
|
||||
SCENE_ID = FRAGMENT.get('id')
|
||||
nomatch_id = stash.find_tag(failure_tag, create=True).get('id')
|
||||
success_id = stash.find_tag(success_tag, create=True).get('id')
|
||||
scene = stash.find_scene(SCENE_ID)
|
||||
if not scene:
|
||||
log.error("Scene not found - check your config.py file")
|
||||
sys.exit(1)
|
||||
result = None
|
||||
for f in scene['files']:
|
||||
hash = hash_file(f)
|
||||
log.debug(hash)
|
||||
result = getPostByHash(hash)
|
||||
if result is not None:
|
||||
break
|
||||
# if no result, add "SHA: No Match tag"
|
||||
if (result == None or not result['Title'] or not result['URL']):
|
||||
stash.update_scenes({
|
||||
'ids': [SCENE_ID],
|
||||
'tag_ids': {
|
||||
'mode': 'ADD',
|
||||
'ids': [nomatch_id]
|
||||
}
|
||||
})
|
||||
return None
|
||||
# check if scene is vertical
|
||||
if check_video_vertical(scene):
|
||||
result['Tags'].append({ 'Name': 'Vertical Video' })
|
||||
# if result, add tag
|
||||
result['Tags'].append({ 'Name': success_tag })
|
||||
return result
|
||||
|
||||
def main():
|
||||
try:
|
||||
result = scrape()
|
||||
print(json.dumps(result))
|
||||
log.exit("Plugin exited normally.")
|
||||
except Exception as e:
|
||||
log.error(e)
|
||||
logging.exception(e)
|
||||
log.exit("Plugin exited with an exception.")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
||||
# by Scruffy, feederbox826
|
||||
# Last Updated 2023-12-14
|
||||
10
scrapers/OnlyFans/SHALookup.yml
Normal file
10
scrapers/OnlyFans/SHALookup.yml
Normal file
@@ -0,0 +1,10 @@
|
||||
name: "SHA256 Lookup"
|
||||
sceneByFragment:
|
||||
action: script
|
||||
script:
|
||||
- python
|
||||
# use python3 instead if needed
|
||||
- SHALookup.py
|
||||
- query
|
||||
|
||||
# Last Updated 2023-12-09
|
||||
82
scrapers/OnlyFans/characters.py
Normal file
82
scrapers/OnlyFans/characters.py
Normal file
@@ -0,0 +1,82 @@
|
||||
characters = {
|
||||
' ': ' ',
|
||||
'0': '⓿',
|
||||
'1': '11⓵➊⑴¹𝟏𝟙1𝟷𝟣⒈𝟭1➀₁①❶⥠',
|
||||
'2': '⓶⒉⑵➋ƻ²ᒿ𝟚2𝟮𝟤ᒾ𝟸Ƨ𝟐②ᴤ₂➁❷ᘝƨ',
|
||||
'3': '³ȝჳⳌꞫ𝟑ℨ𝟛𝟯𝟥Ꝫ➌ЗȜ⓷ӠƷ3𝟹⑶⒊ʒʓǯǮƺ𝕴ᶾзᦡ➂③₃ᶚᴣᴟ❸ҘҙӬӡӭӟӞ',
|
||||
'4': '𝟰𝟺𝟦𝟒➍ҶᏎ𝟜ҷ⓸ҸҹӴӵᶣ4чㄩ⁴➃₄④❹Ӌ⑷⒋',
|
||||
'5': '𝟱⓹➎Ƽ𝟓𝟻𝟝𝟧5➄₅⑤⁵❺ƽ⑸⒌',
|
||||
'6': 'Ⳓ🄇𝟼Ꮾ𝟲𝟞𝟨𝟔➏⓺Ϭϭ⁶б6ᧈ⑥➅₆❻⑹⒍',
|
||||
'7': '𝟕𝟟𝟩𝟳𝟽🄈⓻𐓒➐7⁷⑦₇❼➆⑺⒎',
|
||||
'8': '𐌚🄉➑⓼8𝟠𝟪৪⁸₈𝟴➇⑧❽𝟾𝟖⑻⒏',
|
||||
'9': '൭Ꝯ𝝑𝞋𝟅🄊𝟡𝟵Ⳋ⓽➒੧৭୨9𝟫𝟿𝟗⁹₉Գ➈⑨❾⑼⒐',
|
||||
'10': '⓾❿➉➓🔟⑩⑽⒑',
|
||||
'11': '⑪⑾⒒⓫',
|
||||
'12': '⑫⑿⒓⓬',
|
||||
'13': '⑬⒀⒔⓭',
|
||||
'14': '⑭⒁⒕⓮',
|
||||
'15': '⑮⒂⒖⓯',
|
||||
'16': '⑯⒃⒗⓰',
|
||||
'17': '⑰⒄⒘⓱',
|
||||
'18': '⑱⒅⒙⓲',
|
||||
'19': '⑲⒆⒚⓳',
|
||||
'20': '⑳⒇⒛⓴',
|
||||
'ae': 'æ',
|
||||
'OE': 'Œ',
|
||||
'oe': 'œ',
|
||||
'pi': 'ᒆ',
|
||||
'Nj': 'Nj',
|
||||
'AE': 'ᴁ',
|
||||
'A': '𝑨𝔄ᗄ𝖠𝗔ꓯ𝞐🄐🄰Ꭿ𐊠𝕬𝜜𝐴ꓮᎪ𝚨ꭺ𝝖🅐Å∀🇦₳🅰𝒜𝘈𝐀𝔸дǺᗅⒶAΑᾋᗩĂÃÅǍȀȂĀȺĄʌΛλƛᴀᴬДАልÄₐᕱªǞӒΆẠẢẦẨẬẮẰẲẴẶᾸᾹᾺΆᾼᾈᾉᾊᾌᾍᾎᾏἈἉἊἋἌἍἎἏḀȦǠӐÀÁÂẤẪ𝛢𝓐𝙰𝘼ᗩ',
|
||||
'a': '∂⍺ⓐձǟᵃᶏ⒜аɒaαȃȁคǎმäɑāɐąᾄẚạảǡầẵḁȧӑӓãåάὰάăẩằẳặᾀᾁᾂᾃᾅᾆᾰᾱᾲᾳᾴᶐᾶᾷἀἁἂἃἄἅἆἇᾇậắàáâấẫǻⱥ𝐚𝑎𝒂𝒶𝓪𝔞𝕒𝖆𝖺𝗮𝘢𝙖𝚊𝛂𝛼𝜶𝝰𝞪⍶',
|
||||
'B': '🄑𝔙𝖁ꞵ𝛃𝛽𝜷𝝱𝞫Ᏸ𐌁𝑩𝕭🄱𐊡𝖡𝘽ꓐ𝗕𝘉𝜝𐊂𝚩𝐁𝛣𝝗𝐵𝙱𝔹Ᏼᏼ𝞑Ꞵ𝔅🅑฿𝓑ᗿᗾᗽ🅱ⒷBвϐᗷƁ乃ßცჩ๖βɮБՅ๒ᙖʙᴮᵇጌḄℬΒВẞḂḆɃദᗹᗸᵝᙞᙟᙝᛒᙗᙘᴃ🇧',
|
||||
'b': 'ꮟᏏ𝐛𝘣𝒷𝔟𝓫𝖇𝖻𝑏𝙗𝕓𝒃𝗯𝚋♭ᑳᒈbᖚᕹᕺⓑḃḅҍъḇƃɓƅᖯƄЬᑲþƂ⒝ЪᶀᑿᒀᒂᒁᑾьƀҌѢѣᔎ',
|
||||
'C': 'ꞆႠ℃🄒ᏟⲤ🄲ꓚ𐊢𐌂🅲𐐕🅒☾ČÇⒸCↃƇᑕㄈ¢८↻ĈϾՇȻᙅᶜ⒞ĆҀĊ©टƆℂℭϹС匚ḈҪʗᑖᑡᑢᑣᑤᑥⅭ𝐂𝐶𝑪𝒞𝓒𝕮𝖢𝗖𝘊𝘾ᔍ',
|
||||
'c': '🝌cⅽ𝐜𝑐𝒄𝒸𝓬𝔠𝕔𝖈𝖼𝗰𝘤𝙘𝚌ᴄϲⲥсꮯ𐐽ⲥ𐐽ꮯĉcⓒćčċçҁƈḉȼↄсርᴄϲҫ꒝ςɽϛ𝙲ᑦ᧚𝐜𝑐𝒄𝒸𝓬𝔠𝕔𝖈𝖼𝗰𝘤𝙘𝚌₵🇨ᥴᒼⅽ',
|
||||
'D': '🄓Ꭰ🄳𝔡𝖉𝔻𝗗𝘋𝙳𝐷𝓓𝐃𝑫𝕯𝖣𝔇𝘿ꭰⅅ𝒟ꓓ🅳🅓ⒹDƉᗪƊÐԺᴅᴰↁḊĐÞⅮᗞᑯĎḌḐḒḎᗫᗬᗟᗠᶛᴆ🇩',
|
||||
'd': 'Ꮷ𝔡𝖉ᑯꓒ𝓭ᵭ₫ԃⓓdḋďḍḑḓḏđƌɖɗᵈ⒟ԁⅾᶁԀᑺᑻᑼᑽᒄᑰᑱᶑ𝕕𝖽𝑑𝘥𝒅𝙙𝐝𝗱𝚍ⅆ𝒹ʠժ',
|
||||
'E': '£ᙓ⋿∃ⴺꓱ𝐄𝐸𝔈𝕰𝖤𝘌𝙴𝛦𝜠ꭼ🄔🄴𝙀𝔼𐊆𝚬ꓰ𝝚𝞔𝓔𝑬𝗘🅴🅔ⒺΈEƎἝᕮƐモЄᴇᴱᵉÉ乇ЁɆꂅ€ÈℰΕЕⴹᎬĒĔĖĘĚÊËԐỀẾỄỂẼḔḖẺȄȆẸỆȨḜḘḚἘἙἚἛἜῈΈӖὲέЀϵ🇪',
|
||||
'e': 'əәⅇꬲꞓ⋴𝛆𝛜𝜀𝜖𝜺𝝐𝝴𝞊𝞮𝟄ⲉꮛ𐐩ꞒⲈ⍷𝑒𝓮𝕖𝖊𝘦𝗲𝚎𝙚𝒆𝔢𝖾𝐞Ҿҿⓔe⒠èᧉéᶒêɘἔềếễ૯ǝєεēҽɛểẽḕḗĕėëẻěȅȇẹệȩɇₑęḝḙḛ℮еԑѐӗᥱёἐἑἒἓἕℯ',
|
||||
'F': 'ᖵꘘꓞꟻᖷ𝐅𝐹𝑭𝔽𝕱𝖥𝗙𝙁𝙵𝟊℉🄕🄵𐊇𝔉𝘍𐊥ꓝꞘ🅵🅕𝓕ⒻFғҒᖴƑԲϝቻḞℱϜ₣🇫Ⅎ',
|
||||
'f': '𝐟ᵮ𝑓𝒇𝒻𝓯𝔣𝕗𝖿𝗳𝙛𝚏ꬵꞙẝ𝖋ⓕfƒḟʃբᶠ⒡ſꊰʄ∱ᶂ𝘧',
|
||||
'G': '𝗚𝘎🄖ꓖᏳ🄶Ꮐᏻ𝔾𝓖𝑮𝕲ꮐ𝒢𝙂𝖦𝙶𝔊𝐺𝐆🅶🅖ⒼGɢƓʛĢᘜᴳǴĠԌĜḠĞǦǤԍ₲🇬⅁',
|
||||
'g': 'ᶃᶢⓖgǵĝḡğġǧģց૭ǥɠﻭﻮᵍ⒢ℊɡᧁ𝐠𝑔𝒈𝓰𝔤𝕘𝖌𝗀𝗴𝘨𝙜𝚐',
|
||||
'H': 'Ἤ🄗𝆦🄷𝜢ꓧ𝘏𝐻𝝜𝖧𐋏𝗛ꮋℍᎻℌⲎ𝑯𝞖🅷🅗ዞǶԋⒽHĤᚺḢḦȞḤḨḪĦⱧҢңҤῊΉῌἨἩἪἫἭἮἯᾘᾙᾚᾛᾜᾝᾞᾟӉӈҥΉн卄♓𝓗ℋН𝐇𝙃𝙷ʜ𝛨Η𝚮ᕼӇᴴᵸ🇭',
|
||||
'h': 'ꞕ৸𝕳ꚕᏲℏӊԊꜧᏂҺ⒣ђⓗhĥḣḧȟḥḩḫẖħⱨհһከኩኪካɦℎ𝐡𝒉𝒽𝓱𝔥𝕙𝖍𝗁𝗵𝘩𝙝𝚑իʰᑋᗁɧんɥ',
|
||||
'I': 'ⲒἿ🄘🄸ЇꀤᏆ🅸🅘إﺇٳأﺃٲٵⒾI៸ÌÍÎĨĪĬİÏḮỈǏȈȊỊĮḬƗェエῘῙῚΊἸἹἺἻἼἽἾⅠΪΊɪᶦᑊᥣ𝛪𝐈𝙄𝙸𝓵𝙡𝐼ᴵ𝚰𝑰🇮',
|
||||
'i': '⍳ℹⅈ𝑖𝒊𝒾ı𝚤ɩιιͺ𝛊𝜄𝜾𝞲ꙇӏꭵᎥⓘiìíîĩīĭïḯỉǐȉȋịḭῐῑῒΐῖῗἰἱἲⅰⅼ∣ⵏ│׀ا١۱ߊᛁἳἴἵɨіὶίᶖ𝔦𝚒𝝸𝗂𝐢𝕚𝖎𝗶𝘪𝙞ίⁱᵢ𝓲⒤',
|
||||
'J': '𝐉𝐽𝑱𝒥𝓙𝔍𝕁𝕵𝖩𝗝𝘑𝙅𝙹ꞲͿꓙ🄙🄹🅹🅙ⒿJЈʝᒍנフĴʆวلյʖᴊᴶﻝጋɈⱼՂๅႱįᎫȷ丿ℐℑᒘᒙᒚᒛᒴᒵᒎᒏ🇯',
|
||||
'j': '𝚥ꭻⅉⓙjϳʲ⒥ɉĵǰјڶᶨ𝒿𝘫𝗷𝑗𝙟𝔧𝒋𝗃𝓳𝕛𝚓𝖏𝐣',
|
||||
'K': '𝐊ꝄꝀ𝐾𝑲𝓚𝕶𝖪𝙺𝚱𝝟🄚𝗞🄺𝜥𝘒ꓗ𝙆𝕂Ⲕ𝔎𝛫Ꮶ𝞙𝒦🅺🅚₭ⓀKĸḰќƘкҠκқҟӄʞҚКҡᴋᴷᵏ⒦ᛕЌጕḲΚKҜҝҞĶḴǨⱩϗӃ🇰',
|
||||
'k': 'ⓚꝁkḱǩḳķḵƙⱪᶄ𝐤𝘬𝗄𝕜𝜅𝜘𝜿𝝒𝝹𝞌𝞳𝙠𝚔𝑘𝒌ϰ𝛋𝛞𝟆𝗸𝓴𝓀',
|
||||
'L': '𝐋𝐿𝔏𝕃𝕷𝖫𝗟𝘓𝙇ﴼ🄛🄻𐐛Ⳑ𝑳𝙻𐑃𝓛ⳑꮮᏞꓡ🅻🅛ﺈ└ⓁւLĿᒪ乚ՆʟꓶιԼᴸˡĹረḶₗΓլĻᄂⅬℒⱢᥧᥨᒻᒶᒷᶫﺎᒺᒹᒸᒫ⎳ㄥŁⱠﺄȽ🇱',
|
||||
'l': 'ⓛlŀĺľḷḹļӀℓḽḻłレɭƚɫⱡ|Ɩ⒧ʅǀוןΙІ|ᶩӏ𝓘𝕀𝖨𝗜𝘐𝐥𝑙𝒍𝓁𝔩𝕝𝖑𝗅𝗹𝘭𝚕𝜤𝝞ı𝚤ɩι𝛊𝜄𝜾𝞲',
|
||||
'M': 'ꮇ🄜🄼𐌑𐊰ꓟⲘᎷ🅼🅜ⓂMмṂ൱ᗰ州ᘻო๓♏ʍᙏᴍᴹᵐ⒨ḾМṀ௱ⅯℳΜϺᛖӍӎ𝐌𝑀𝑴𝓜𝔐𝕄𝕸𝖬𝗠𝘔𝙈𝙼𝚳𝛭𝜧𝝡𝞛🇲',
|
||||
'm': '₥ᵯ𝖒𝐦𝗆𝔪𝕞𝓂ⓜmനᙢ൩ḿṁⅿϻṃጠɱ៳ᶆ𝙢𝓶𝚖𝑚𝗺᧕᧗',
|
||||
'N': '𝇙𝇚𝇜🄝𝆧𝙉🄽ℕꓠ𝛮𝝢𝙽𝚴𝑵𝑁Ⲛ𝐍𝒩𝞜𝗡𝘕𝜨𝓝𝖭🅽₦🅝ЙЍⓃҋ៷NᴎɴƝᑎ几иՈռИהЛπᴺᶰŃ刀ክṄⁿÑПΝᴨոϖǸŇṆŅṊṈทŊӢӣӤӥћѝйᥢҊᴻ🇳',
|
||||
'n': 'ոռח𝒏𝓷𝙣𝑛𝖓𝔫𝗇𝚗𝗻ᥒⓝήnǹᴒńñᾗηṅňṇɲņṋṉղຖՌƞŋ⒩ภกɳпʼnлԉȠἠἡῃդᾐᾑᾒᾓᾔᾕᾖῄῆῇῂἢἣἤἥἦἧὴήበቡቢባቤብቦȵ𝛈𝜂𝜼𝝶𝞰𝕟𝘯𝐧𝓃ᶇᵰᥥ∩',
|
||||
'O': '𝜽⭘🔿ꭴ⭕⏺🄁🄀Ꭴ𝚯𝚹𝛩𝛳𝜣𝜭𝝝𝝧𝞗𝞡ⴱᎾᏫ⍬𝞱𝝷𝛉𝟎𝜃θ𝟘𝑂𝑶𝓞𝔒𝕆𝕺𝗢𝘖𝙊𝛰㈇ꄲ🄞🔾🄾𐊒𝟬ꓳⲞ𐐄𐊫𐓂𝞞🅞⍥◯ⵁ⊖0⊝𝝤Ѳϴ𝚶𝜪ѺӦӨӪΌʘ𝐎ǑÒŎÓÔÕȌȎㇿ❍ⓄOὋロ૦⊕ØФԾΘƠᴼᵒ⒪ŐÖₒ¤◊Φ〇ΟОՕଠഠ௦סỒỐỖỔṌȬṎŌṐṒȮȰȪỎỜỚỠỞỢỌỘǪǬǾƟⵔ߀៰⍜⎔⎕⦰⦱⦲⦳⦴⦵⦶⦷⦸⦹⦺⦻⦼⦽⦾⦿⧀⧁⧂⧃ὈὉὊὌὍ',
|
||||
'o': 'ంಂംං૦௦۵ℴ𝑜𝒐𝖔ꬽ𝝄𝛔𝜎𝝈𝞂ჿ𝚘০୦ዐ𝛐𝗈𝞼ဝⲟ𝙤၀𐐬𝔬𐓪𝓸🇴⍤○ϙ🅾𝒪𝖮𝟢𝟶𝙾𝘰𝗼𝕠𝜊𝐨𝝾𝞸ᐤⓞѳ᧐ᥲðoఠᦞՓòөӧóºōôǒȏŏồốȍỗổõσṍȭṏὄṑṓȯȫ๏ᴏőöѻоዐǭȱ০୦٥౦೦൦๐໐οօᴑ०੦ỏơờớỡởợọộǫøǿɵծὀὁόὸόὂὃὅ',
|
||||
'P': '🄟🄿ꓑ𝚸𝙿𝞠𝙋ꮲⲢ𝒫𝝦𝑃𝑷𝗣𝐏𐊕𝜬𝘗𝓟𝖯𝛲Ꮲ🅟Ҏ🅿ⓅPƤᑭ尸Ṗրφքᴘᴾᵖ⒫ṔアקРየᴩⱣℙΡῬᑸᑶᑷᑹᑬᑮ🇵₱',
|
||||
'p': 'ⲣҏ℗ⓟpṕṗƥᵽῥρрƿǷῤ⍴𝓹𝓅𝐩𝑝𝒑𝔭𝕡𝖕𝗉𝗽𝘱𝙥𝚙𝛒𝝆𝞺𝜌𝞀',
|
||||
'Q': '🅀🄠Ꝗ🆀🅠ⓆQℚⵕԚ𝐐𝑄𝑸𝒬𝓠𝚀𝘘𝙌𝖰𝕼𝔔𝗤🇶',
|
||||
'q': '𝓆ꝗ𝗾ⓠqգ⒬۹զᑫɋɊԛ𝗊𝑞𝘲𝕢𝚚𝒒𝖖𝐪𝔮𝓺𝙦',
|
||||
'R': '℞🄡℟ꭱᏒ𐒴ꮢᎡꓣ🆁🅡ⓇRᴙȒʀᖇя尺ŔЯરƦᴿዪṚɌʁℛℜℝṘŘȐṜŖṞⱤ𝐑𝑅𝑹𝓡𝕽𝖱𝗥𝘙𝙍𝚁ᚱ🇷ᴚ',
|
||||
'r': '𝚛ꭇᣴℾ𝚪𝛤𝜞𝝘𝞒ⲄГᎱᒥꭈⲅꮁⓡrŕṙřȑȓṛṝŗгՐɾᥬṟɍʳ⒭ɼѓᴦᶉ𝐫𝑟𝒓𝓇𝓻𝔯𝕣𝖗𝗋𝗿𝘳𝙧ᵲґᵣ',
|
||||
'S': '🅂🄪🄢ꇙ𝓢𝗦Ꮪ𝒮Ꮥ𝚂𝐒ꓢ𝖲𝔖𝙎𐊖𝕾𐐠𝘚𝕊𝑆𝑺🆂🅢ⓈSṨŞֆՏȘˢ⒮ЅṠŠŚṤŜṦṢടᔕᔖᔢᔡᔣᔤ',
|
||||
's': 'ᣵⓢꜱ𐑈ꮪsśṥŝṡšṧʂṣṩѕşșȿᶊక𝐬𝑠𝒔𝓈𝓼𝔰𝕤𝖘𝗌𝘀𝘴𝙨𝚜ގ🇸',
|
||||
'T': '🅃🄣七ፒ𝜯🆃𐌕𝚻𝛵𝕋𝕿𝑻𐊱𐊗𝖳𝙏🝨𝝩𝞣𝚃𝘛𝑇ꓔ⟙𝐓Ⲧ𝗧⊤𝔗Ꭲꭲ𝒯🅣⏇⏉ⓉTтҬҭƬイŦԵτᴛᵀイፕϮŤ⊥ƮΤТ下ṪṬȚŢṰṮ丅丁ᐪ𝛕𝜏𝝉𝞃𝞽𝓣ㄒ🇹ጥ',
|
||||
't': 'ⓣtṫẗťṭțȶ੮էʇ†ţṱṯƭŧᵗ⒯ʈեƫ𝐭𝑡𝒕𝓉𝓽𝔱𝕥𝖙𝗍𝘁𝘵𝙩𝚝ナ',
|
||||
'U': '🅄Џ🄤ሀꓴ𐓎꒤🆄🅤ŨŬŮᑗᑘǓǕǗǙⓊUȖᑌ凵ƱմԱꓵЦŪՄƲᙀᵁᵘ⒰ŰપÜՍÙÚÛṸṺǛỦȔƯỪỨỮỬỰỤṲŲṶṴɄᥩᑧ∪ᘮ⋃𝐔𝑈𝑼𝒰𝓤𝔘𝕌𝖀𝖴𝗨𝘜𝙐𝚄🇺',
|
||||
'u': '𝘂𝘶𝙪𝚞ꞟꭎꭒ𝛖𝜐𝝊𝞄𝞾𐓶ὺύⓤuùũūừṷṹŭǖữᥙǚǜὗυΰนսʊǘǔúůᴜűųยûṻцሁüᵾᵤµʋủȕȗưứửựụṳṵʉῠῡῢΰῦῧὐὑϋύὒὓὔὕὖᥔ𝐮𝑢𝒖𝓊𝓾𝔲𝕦𝖚𝗎ᶙ',
|
||||
'V': '𝑉𝒱𝕍𝗩🄥🅅ꓦ𝑽𝖵𝘝Ꮩ𝚅𝙑𝐕🆅🅥ⓋVᐯѴᵛ⒱۷ṾⅴⅤṼ٧ⴸѶᐺᐻ🇻𝓥',
|
||||
'v': '∨⌄⋁ⅴ𝐯𝑣𝒗𝓋𝔳𝕧𝖛𝗏ꮩሀⓥv𝜐𝝊ṽṿ౮งѵעᴠνטᵥѷ៴ᘁ𝙫𝚟𝛎𝜈𝝂𝝼𝞶𝘷𝘃𝓿',
|
||||
'W': '𝐖𝑊𝓦𝔚𝕎𝖂𝖶𝗪𝙒𝚆🄦🅆ᏔᎳ𝑾ꓪ𝒲𝘞🆆Ⓦ🅦wWẂᾧᗯᥕ山ѠຟచաЩШώщฬшᙎᵂʷ⒲ฝሠẄԜẀŴẆẈധᘺѿᙡƜ₩🇼',
|
||||
'w': '𝐰ꝡ𝑤𝒘𝓌𝔀𝔴𝕨𝖜𝗐𝘄𝘸𝙬𝚠աẁꮃẃⓦ⍵ŵẇẅẘẉⱳὼὠὡὢὣωὤὥὦὧῲῳῴῶῷⱲѡԝᴡώᾠᾡᾢᾣᾤᾥᾦɯ𝝕𝟉𝞏',
|
||||
'X': 'ꭓꭕ𝛘𝜒𝝌𝞆𝟀ⲭ🞨𝑿𝛸🄧🞩🞪🅇🞫🞬𐌗Ⲭꓫ𝖃𝞦𝘟𐊐𝚾𝝬𝜲Ꭓ𐌢𝖷𝑋𝕏𝔛𐊴𝗫🆇🅧❌Ⓧ𝓧XẊ᙭χㄨ𝒳ӾჯӼҳЖΧҲᵡˣ⒳אሸẌꊼⅩХ╳᙮ᕁᕽⅹᚷⵝ𝙓𝚇乂𝐗🇽',
|
||||
'x': '᙮ⅹ𝑥𝒙𝓍𝔵𝕩𝖝𝗑𝘅ᕁᕽⓧxхẋ×ₓ⤫⤬⨯ẍᶍ𝙭ӽ𝘹𝐱𝚡⨰メ𝔁',
|
||||
'Y': '𝒴🄨𝓨𝔜𝖄𝖸𝘠𝙔𝚼𝛶𝝪𝞤УᎩᎽⲨ𝚈𝑌𝗬𝐘ꓬ𝒀𝜰𐊲🆈🅨ⓎYὛƳㄚʏ⅄ϔ¥¥ՎϓγץӲЧЎሃŸɎϤΥϒҮỲÝŶỸȲẎỶỴῨῩῪΎὙὝὟΫΎӮӰҰұ𝕐🇾',
|
||||
'y': '𝐲𝑦𝒚𝓎𝔂𝔶𝕪𝖞𝗒𝘆𝘺𝙮𝚢ʏỿꭚγℽ𝛄𝛾𝜸𝝲𝞬🅈ᎽᎩⓨyỳýŷỹȳẏÿỷуყẙỵƴɏᵞɣʸᶌү⒴ӳӱӯўУʎ',
|
||||
'Z': '🄩🅉ꓜ𝗭𝐙☡Ꮓ𝘡🆉🅩ⓏZẔƵ乙ẐȤᶻ⒵ŹℤΖŻŽẒⱫ🇿',
|
||||
'z': '𝑍𝒁𝒵𝓩𝖹𝙕𝚉𝚭𝛧𝜡𝝛𝞕ᵶꮓ𝐳𝑧𝒛𝓏𝔃𝔷𝕫𝖟𝗓𝘇𝘻𝙯𝚣ⓩzźẑżžẓẕƶȥɀᴢጊʐⱬᶎʑᙆ'
|
||||
}
|
||||
9
scrapers/OnlyFans/config.py.example
Normal file
9
scrapers/OnlyFans/config.py.example
Normal file
@@ -0,0 +1,9 @@
|
||||
stashconfig = {
|
||||
"scheme": "http",
|
||||
"Host":"localhost",
|
||||
"Port": "9999",
|
||||
"ApiKey": "",
|
||||
}
|
||||
success_tag = "[SHA: Scraped]"
|
||||
failure_tag = "[SHA: No Match]"
|
||||
disable_nfkd = False
|
||||
34
scrapers/OnlyFans/confusables.py
Normal file
34
scrapers/OnlyFans/confusables.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from characters import characters
|
||||
from util import checkLNP, clean
|
||||
import math
|
||||
|
||||
# The current cache of all the supported alphabet characters
|
||||
alphabetMap = dict()
|
||||
|
||||
# The current cache of all the supported confusable characters
|
||||
confusablesMap = dict()
|
||||
|
||||
for key, value in characters.items():
|
||||
alphabetMap[key] = value
|
||||
for char in value:
|
||||
confusablesMap[char] = key
|
||||
|
||||
# Removes confusable unicode characters from a string.
|
||||
def remove(str):
|
||||
if checkLNP(str):
|
||||
return str;
|
||||
newStr = '';
|
||||
for char in clean(str):
|
||||
newStr += confusablesMap.get(char) or char
|
||||
return newStr;
|
||||
|
||||
# Randomly mixes up a string with random confusable characters.
|
||||
def obfuscate(str):
|
||||
newStr = '';
|
||||
for char in str:
|
||||
charMap = alphabetMap.get(char);
|
||||
if (charMap):
|
||||
newStr += charMap[math.floor(math.random() * charMap.length)];
|
||||
else:
|
||||
newStr += char;
|
||||
return newStr;
|
||||
33
scrapers/OnlyFans/migrate_from_db.py
Normal file
33
scrapers/OnlyFans/migrate_from_db.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import os
|
||||
from sqlite import get_rows
|
||||
|
||||
import config
|
||||
stashconfig = config.stashconfig if hasattr(config, 'stashconfig') else {
|
||||
"scheme": "http",
|
||||
"Host":"localhost",
|
||||
"Port": "9999",
|
||||
"ApiKey": "",
|
||||
}
|
||||
|
||||
try:
|
||||
import stashapi.log as log
|
||||
from stashapi.stashapp import StashInterface
|
||||
except ModuleNotFoundError:
|
||||
print("You need to install the stashapp-tools (stashapi) python module. (cmd): pip install stashapp-tools", file=sys.stderr)
|
||||
sys.exit()
|
||||
|
||||
|
||||
stash = StashInterface(stashconfig)
|
||||
|
||||
if os.path.exists("sha-cache.db"):
|
||||
|
||||
log.info("migrating sha256 values to fingerprints...")
|
||||
for sha256, oshash in get_rows():
|
||||
log.info(f"{sha256=} {oshash=}")
|
||||
|
||||
scene = stash.find_scene_by_hash({"oshash":oshash}, fragment='id files { id fingerprint(type:"sha256") } ')
|
||||
if scene["files"][0]["fingerprint"]:
|
||||
return
|
||||
stash.file_set_fingerprints(scene["files"][0]["id"], {"type": "sha256", "value":sha256})
|
||||
|
||||
os.rename("sha-cache.db", "sha-cache.db.old")
|
||||
45
scrapers/OnlyFans/oftitle.py
Normal file
45
scrapers/OnlyFans/oftitle.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import re
|
||||
|
||||
dmRegex = r"\b(dm)(?:[\'\‘\’\`\"\“\”]*)(?:s?)\b"
|
||||
triggerArray = [
|
||||
# DM / in your DMs
|
||||
"dm",
|
||||
"dms"
|
||||
"inbox",
|
||||
"messages"
|
||||
# sending
|
||||
"sending you",
|
||||
"sending this",
|
||||
# partial video
|
||||
"teaser",
|
||||
"snippet",
|
||||
"entire",
|
||||
"full video",
|
||||
"full vid",
|
||||
"full scene",
|
||||
# message prompts
|
||||
"with the message",
|
||||
"message me",
|
||||
"send me"
|
||||
# unlocking
|
||||
"unlock",
|
||||
"receive it",
|
||||
"purchase",
|
||||
# tipping
|
||||
"under this post",
|
||||
"tip",
|
||||
# rebill
|
||||
"rebills",
|
||||
"rebillers",
|
||||
]
|
||||
|
||||
def findTrailerTrigger(oftitle):
|
||||
# check regex
|
||||
if re.search(dmRegex, oftitle, re.IGNORECASE):
|
||||
return True
|
||||
# check other regex array
|
||||
for trigger in triggerArray:
|
||||
triggerRegex = f"\b{trigger}\b"
|
||||
if re.search(triggerRegex, oftitle, re.IGNORECASE):
|
||||
return True
|
||||
return False
|
||||
4
scrapers/OnlyFans/requirements.txt
Normal file
4
scrapers/OnlyFans/requirements.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
emojis
|
||||
requests
|
||||
lxml
|
||||
stashapp-tools>=0.2.40
|
||||
26
scrapers/OnlyFans/sqlite.py
Normal file
26
scrapers/OnlyFans/sqlite.py
Normal file
@@ -0,0 +1,26 @@
|
||||
import sqlite3
|
||||
|
||||
db = sqlite3.connect('sha-cache.db')
|
||||
cursor = db.cursor()
|
||||
|
||||
def setup_sqlite():
|
||||
# set up migrations
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS sha_cache (
|
||||
sha256 TEXT NOT NULL,
|
||||
oshash TEXT NOT NULL
|
||||
);""")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS oshash_index ON sha_cache (oshash);")
|
||||
db.commit()
|
||||
|
||||
def add_sha256(sha256, oshash):
|
||||
cursor.execute("INSERT INTO sha_cache VALUES (?, ?)", (sha256, oshash))
|
||||
db.commit()
|
||||
|
||||
def lookup_sha(oshash):
|
||||
cursor.execute("SELECT sha256 FROM sha_cache WHERE oshash = ?", [oshash])
|
||||
return cursor.fetchone()
|
||||
|
||||
def get_rows():
|
||||
cursor.execute("SELECT sha256, oshash FROM sha_cache")
|
||||
return cursor.fetchall()
|
||||
18
scrapers/OnlyFans/util.py
Normal file
18
scrapers/OnlyFans/util.py
Normal file
@@ -0,0 +1,18 @@
|
||||
import re
|
||||
# @copyright Mathias Bynens <https://mathiasbynens.be/>. MIT license.
|
||||
|
||||
regexSymbolWithCombiningMarks = re.compile("([\0-\u02FF\u0370-\u1AAF\u1B00-\u1DBF\u1E00-\u20CF\u2100-\uD7FF\uE000-\uFE1F\uFE30-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF])([\u0300-\u036F\u1AB0-\u1AFF\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]+)")
|
||||
regexLineBreakCombiningMarks = re.compile("[\0-\x08\x0E-\x1F\x7F-\x84\x86-\x9F\u0300-\u034E\u0350-\u035B\u0363-\u036F\u0483-\u0489\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u061C\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u08D4-\u08E1\u08E3-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u0A01-\u0A03\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A70\u0A71\u0A75\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AE2\u0AE3\u0B01-\u0B03\u0B3C\u0B3E-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B62\u0B63\u0B82\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C00-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C62\u0C63\u0C81-\u0C83\u0CBC\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CE2\u0CE3\u0D01-\u0D03\u0D3E-\u0D44\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D62\u0D63\u0D82\u0D83\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F7E\u0F80-\u0F84\u0F86\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u135D-\u135F\u1712-\u1714\u1732-\u1734\u1752\u1753\u1772\u1773\u180B-\u180D\u1885\u1886\u18A9\u1920-\u192B\u1930-\u193B\u1A17-\u1A1B\u1A7F\u1AB0-\u1ABE\u1B00-\u1B04\u1B34-\u1B44\u1B6B-\u1B73\u1B80-\u1B82\u1BA1-\u1BAD\u1BE6-\u1BF3\u1C24-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE8\u1CED\u1CF2-\u1CF4\u1CF8\u1CF9\u1DC0-\u1DF5\u1DFB-\u1DFF\u200C\u200E\u200F\u202A-\u202E\u2066-\u206F\u20D0-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302F\u3035\u3099\u309A\uA66F-\uA672\uA674-\uA67D\uA69E\uA69F\uA6F0\uA6F1\uA802\uA806\uA80B\uA823-\uA827\uA880\uA881\uA8B4-\uA8C5\uA8E0-\uA8F1\uA926-\uA92D\uA947-\uA953\uA980-\uA983\uA9B3-\uA9C0\uAA29-\uAA36\uAA43\uAA4C\uAA4D\uAAEB-\uAAEF\uAAF5\uAAF6\uABE3-\uABEA\uABEC\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2F\uFFF9-\uFFFB]|\uD800[\uDDFD\uDEE0\uDF76-\uDF7A]|\uD802[\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F\uDEE5\uDEE6]|\uD804[\uDC00-\uDC02\uDC38-\uDC46\uDC7F-\uDC82\uDCB0-\uDCBA\uDD00-\uDD02\uDD27-\uDD34\uDD73\uDD80-\uDD82\uDDB3-\uDDC0\uDDCA-\uDDCC\uDE2C-\uDE37\uDE3E\uDEDF-\uDEEA\uDF00-\uDF03\uDF3C\uDF3E-\uDF44\uDF47\uDF48\uDF4B-\uDF4D\uDF57\uDF62\uDF63\uDF66-\uDF6C\uDF70-\uDF74]|\uD805[\uDC35-\uDC46\uDCB0-\uDCC3\uDDAF-\uDDB5\uDDB8-\uDDC0\uDDDC\uDDDD\uDE30-\uDE40\uDEAB-\uDEB7]|\uD807[\uDC2F-\uDC36\uDC38-\uDC3F\uDC92-\uDCA7\uDCA9-\uDCB6]|\uD81A[\uDEF0-\uDEF4\uDF30-\uDF36]|\uD81B[\uDF51-\uDF7E\uDF8F-\uDF92]|\uD82F[\uDC9D\uDC9E\uDCA0-\uDCA3]|\uD834[\uDD65-\uDD69\uDD6D-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44]|\uD836[\uDE00-\uDE36\uDE3B-\uDE6C\uDE75\uDE84\uDE9B-\uDE9F\uDEA1-\uDEAF]|\uD838[\uDC00-\uDC06\uDC08-\uDC18\uDC1B-\uDC21\uDC23\uDC24\uDC26-\uDC2A]|\uD83A[\uDCD0-\uDCD6\uDD44-\uDD4A]|\uDB40[\uDC01\uDC20-\uDC7F\uDD00-\uDDEF]")
|
||||
checkLNPRegex = re.compile("^(?:[~`!@#%^&*()\{\}\[\];:\"'<,.>?/\\|_+=-]|[a-zA-Z0-9\s])+$")
|
||||
|
||||
def checkLNP(str):
|
||||
return checkLNPRegex.match(str);
|
||||
|
||||
"""
|
||||
Utility function to call 2 other functions which remove Combining Marks/Invisible characters
|
||||
"""
|
||||
def clean(str):
|
||||
str = re.sub(regexLineBreakCombiningMarks, '', str)
|
||||
str = re.sub(regexSymbolWithCombiningMarks, '$1', str)
|
||||
str = re.sub(r'[\u200B-\u200D\uFEFF\u2063]', '', str)
|
||||
return str
|
||||
Reference in New Issue
Block a user