Fixed search failure due to unexpected parser state
Some checks failed
CI / Check (3.10) (push) Has been cancelled
CI / Check (3.11) (push) Has been cancelled
CI / Check (3.12) (push) Has been cancelled
CI / Check (3.13-dev) (push) Has been cancelled
CI / Check (3.7) (push) Has been cancelled
CI / Check (3.8) (push) Has been cancelled
CI / Check (3.9) (push) Has been cancelled

This commit is contained in:
ducalex 2024-10-01 18:05:38 -04:00 committed by GitHub
parent 93635981e8
commit 40d7c52d6e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 34 additions and 67 deletions

View File

@ -1,4 +1,4 @@
#VERSION: 4.8
#VERSION: 4.9
# AUTHORS: Lima66
# CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
@ -38,7 +38,7 @@ class limetorrents(object):
HTMLParser.__init__(self)
self.url = url
self.current_item = {} # dict for found item
self.page_empty = 22000
self.page_items = 0
self.inside_table = False
self.inside_tr = False
self.column_index = -1
@ -112,6 +112,7 @@ class limetorrents(object):
self.column_name = None
if "link" in self.current_item:
prettyPrinter(self.current_item)
self.page_items += 1
def download_torrent(self, info):
# since limetorrents provides torrent links in itorrent (cloudflare protected),
@ -128,14 +129,11 @@ class limetorrents(object):
query = query.replace("%20", "-")
category = self.supported_categories[cat]
parser = self.MyHtmlParser(self.url)
page = 1
while True:
page_url = "{0}/search/{1}/{2}/seeds/{3}/".format(self.url, category, query, page)
for page in range(1, 5):
page_url = f"{self.url}/search/{category}/{query}/seeds/{page}/"
html = retrieve_url(page_url)
lunghezza_html = len(html)
if page > 6 or lunghezza_html <= parser.page_empty:
return
parser = self.MyHtmlParser(self.url)
parser.feed(html)
page += 1
parser.close()
parser.close()
if parser.page_items < 20:
break

View File

@ -1,4 +1,4 @@
# VERSION: 2.3
# VERSION: 2.4
# AUTHORS: nKlido
# LICENSING INFORMATION
@ -24,7 +24,6 @@ from helpers import retrieve_url
from novaprinter import prettyPrinter
from html.parser import HTMLParser
from datetime import datetime
import math
class solidtorrents(object):
@ -47,8 +46,6 @@ class solidtorrents(object):
self.parseDate = False
self.column = 0
self.torrentReady = False
self.foundSearchStats = False
self.parseTotalResults = False
self.totalResults = 0
self.torrent_info = self.empty_torrent_info()
@ -68,13 +65,6 @@ class solidtorrents(object):
def handle_starttag(self, tag, attrs):
params = dict(attrs)
if 'search-stats' in params.get('class', ''):
self.foundSearchStats = True
if (self.foundSearchStats and tag == 'b'):
self.parseTotalResults = True
self.foundSearchStats = False
if 'search-result' in params.get('class', ''):
self.foundResult = True
return
@ -115,13 +105,10 @@ class solidtorrents(object):
prettyPrinter(self.torrent_info)
self.torrentReady = False
self.torrent_info = self.empty_torrent_info()
self.totalResults += 1
def handle_data(self, data):
if (self.parseTotalResults):
self.totalResults = int(data.strip())
self.parseTotalResults = False
if (self.parseTitle):
if (bool(data.strip()) and data != '\n'):
self.torrent_info['name'] = data
@ -161,12 +148,9 @@ class solidtorrents(object):
def search(self, what, cat='all'):
category = self.supported_categories[cat]
parser = self.TorrentInfoParser(self.url)
parser.feed(self.request(what, category, 1))
totalPages = min(math.ceil(parser.totalResults / 20), 5)
for page in range(2, totalPages + 1):
for page in range(1, 5):
parser = self.TorrentInfoParser(self.url)
parser.feed(self.request(what, category, page))
parser.close()
parser.close()
if parser.totalResults < 15:
break

View File

@ -1,8 +1,7 @@
#VERSION: 2.23
#VERSION: 2.24
# AUTHORS: Douman (custparasite@gmx.se)
# CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)
from re import compile as re_compile
from html.parser import HTMLParser
from datetime import datetime, timedelta
@ -35,6 +34,7 @@ class torlock(object):
self.item_bad = False # set to True for malicious links
self.current_item = None # dict for found item
self.item_name = None # key's name in current_item dict
self.page_items = 0
self.parser_class = {"td": "pub_date",
"ts": "size",
"tul": "seeds",
@ -91,26 +91,19 @@ class torlock(object):
except Exception:
self.current_item["pub_date"] = -1
prettyPrinter(self.current_item)
self.page_items += 1
self.current_item = {}
def search(self, query, cat='all'):
""" Performs search """
query = query.replace("%20", "-")
category = self.supported_categories[cat]
parser = self.MyHtmlParser(self.url)
page = "".join((self.url, "/", self.supported_categories[cat],
"/torrents/", query, ".html?sort=seeds&page=1"))
html = retrieve_url(page)
parser.feed(html)
counter = 1
additional_pages = re_compile(r"/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+"
.format(self.supported_categories[cat], query))
list_searches = additional_pages.findall(html)[:-1] # last link is next(i.e. second)
for page in map(lambda link: "".join((self.url, link)), list_searches):
html = retrieve_url(page)
for page in range(1, 5):
parser = self.MyHtmlParser(self.url)
page_url = f"{self.url}/{category}/torrents/{query}.html?sort=seeds&page={page}"
html = retrieve_url(page_url)
parser.feed(html)
counter += 1
if counter > 3:
parser.close()
if parser.page_items < 20:
break
parser.close()

View File

@ -1,4 +1,4 @@
#VERSION: 1.4
#VERSION: 1.5
#AUTHORS: mauricci
from helpers import retrieve_url
@ -102,26 +102,18 @@ class torrentproject(object):
elif curr_key != 'name':
self.singleResData[curr_key] += data.strip()
def feed(self, html):
HTMLParser.feed(self, html)
self.pageComplete = False
self.insideResults = False
self.insideDataDiv = False
self.spanCount = -1
def search(self, what, cat='all'):
# curr_cat = self.supported_categories[cat]
parser = self.MyHTMLParser(self.url)
what = what.replace('%20', '+')
# analyze first 5 pages of results
for currPage in range(0, 5):
url = self.url + '/browse?t={0}&p={1}'.format(what, currPage)
html = retrieve_url(url)
parser = self.MyHTMLParser(self.url)
parser.feed(html)
if len(parser.pageRes) <= 0:
parser.close()
if len(parser.pageRes) < 20:
break
del parser.pageRes[:]
parser.close()
def download_torrent(self, info):
""" Downloader """

View File

@ -1,8 +1,8 @@
eztv: 1.16
jackett: 4.0
limetorrents: 4.8
limetorrents: 4.9
piratebay: 3.3
solidtorrents: 2.3
torlock: 2.23
torrentproject: 1.4
solidtorrents: 2.4
torlock: 2.24
torrentproject: 1.5
torrentscsv: 1.4