Files
OF-Scraper/ofscraper/classes/of/base.py
T
2025-10-16 11:03:43 -05:00

49 lines
1.4 KiB
Python
Executable File

import importlib
import re
from bs4 import BeautifulSoup
import ofscraper.utils.settings as settings
html_parser = "lxml" if importlib.util.find_spec("lxml") else "html.parser"
class base:
def __init__(self):
None
def text_trunicate(self, text):
text = str(text)
if text is None:
return "None"
if len(text) == 0:
return text
length = int(settings.get_settings().text_length)
if length == 0:
return text
elif settings.get_settings().text_type == "letter":
return f"{''.join(list(text)[:length])}"
else:
# split and reduce
wordarray = list(filter(lambda x: len(x) != 0, re.split("( )", text)))
splitArray = wordarray[: length + 1]
text = f"{''.join(splitArray)}"
text = re.sub(" +$", "", text)
return text
def file_cleanup(self, text):
text = str(text)
text = re.sub("<[^>]*>", "", text)
text = re.sub('[\n<>:"/\|?*:;]+', "", text)
text = re.sub("-+", "_", text)
text = re.sub(" +", " ", text)
text = re.sub(" ", settings.get_settings().space_replacer, text)
return text
def db_cleanup(self, string):
string = string or ""
string = re.sub("<[^>]*>", "", string)
string = " ".join(string.split())
string = BeautifulSoup(string, html_parser).get_text()
return string