Fixed search failure due to unexpected parser state

2024-11-26 19:10:22 +00:00 · 2024-10-01 18:05:38 -04:00 · 2024-10-01 18:05:38 -04:00 · 40d7c52d6e
commit 40d7c52d6e
parent 93635981e8
5 changed files with 34 additions and 67 deletions
--- a/nova3/engines/limetorrents.py
+++ b/nova3/engines/limetorrents.py
@ -1,4 +1,4 @@
-#VERSION: 4.8
+#VERSION: 4.9
 # AUTHORS: Lima66
 # CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)

@ -38,7 +38,7 @@ class limetorrents(object):
            HTMLParser.__init__(self)
            self.url = url
            self.current_item = {}  # dict for found item
-            self.page_empty = 22000
+            self.page_items = 0
            self.inside_table = False
            self.inside_tr = False
            self.column_index = -1
@ -112,6 +112,7 @@ class limetorrents(object):
                self.column_name = None
                if "link" in self.current_item:
                    prettyPrinter(self.current_item)
+                    self.page_items += 1

    def download_torrent(self, info):
        # since limetorrents provides torrent links in itorrent (cloudflare protected),
@ -128,14 +129,11 @@ class limetorrents(object):
        query = query.replace("%20", "-")
        category = self.supported_categories[cat]

-        parser = self.MyHtmlParser(self.url)
-        page = 1
-        while True:
-            page_url = "{0}/search/{1}/{2}/seeds/{3}/".format(self.url, category, query, page)
+        for page in range(1, 5):
+            page_url = f"{self.url}/search/{category}/{query}/seeds/{page}/"
            html = retrieve_url(page_url)
-            lunghezza_html = len(html)
-            if page > 6 or lunghezza_html <= parser.page_empty:
-                return
+            parser = self.MyHtmlParser(self.url)
            parser.feed(html)
-            page += 1
-        parser.close()
+            parser.close()
+            if parser.page_items < 20:
+                break
--- a/nova3/engines/solidtorrents.py
+++ b/nova3/engines/solidtorrents.py
@ -1,4 +1,4 @@
-# VERSION: 2.3
+# VERSION: 2.4
 # AUTHORS: nKlido

 # LICENSING INFORMATION
@ -24,7 +24,6 @@ from helpers import retrieve_url
 from novaprinter import prettyPrinter
 from html.parser import HTMLParser
 from datetime import datetime
-import math


 class solidtorrents(object):
@ -47,8 +46,6 @@ class solidtorrents(object):
            self.parseDate = False
            self.column = 0
            self.torrentReady = False
-            self.foundSearchStats = False
-            self.parseTotalResults = False
            self.totalResults = 0

            self.torrent_info = self.empty_torrent_info()
@ -68,13 +65,6 @@ class solidtorrents(object):
        def handle_starttag(self, tag, attrs):
            params = dict(attrs)

-            if 'search-stats' in params.get('class', ''):
-                self.foundSearchStats = True
-
-            if (self.foundSearchStats and tag == 'b'):
-                self.parseTotalResults = True
-                self.foundSearchStats = False
-
            if 'search-result' in params.get('class', ''):
                self.foundResult = True
                return
@ -115,13 +105,10 @@ class solidtorrents(object):
                prettyPrinter(self.torrent_info)
                self.torrentReady = False
                self.torrent_info = self.empty_torrent_info()
+                self.totalResults += 1

        def handle_data(self, data):

-            if (self.parseTotalResults):
-                self.totalResults = int(data.strip())
-                self.parseTotalResults = False
-
            if (self.parseTitle):
                if (bool(data.strip()) and data != '\n'):
                    self.torrent_info['name'] = data
@ -161,12 +148,9 @@ class solidtorrents(object):
    def search(self, what, cat='all'):
        category = self.supported_categories[cat]

-        parser = self.TorrentInfoParser(self.url)
-        parser.feed(self.request(what, category, 1))
-
-        totalPages = min(math.ceil(parser.totalResults / 20), 5)
-
-        for page in range(2, totalPages + 1):
+        for page in range(1, 5):
+            parser = self.TorrentInfoParser(self.url)
            parser.feed(self.request(what, category, page))
-
-        parser.close()
+            parser.close()
+            if parser.totalResults < 15:
+                break
--- a/nova3/engines/torlock.py
+++ b/nova3/engines/torlock.py
@ -1,8 +1,7 @@
-#VERSION: 2.23
+#VERSION: 2.24
 # AUTHORS: Douman (custparasite@gmx.se)
 # CONTRIBUTORS: Diego de las Heras (ngosang@hotmail.es)

-from re import compile as re_compile
 from html.parser import HTMLParser
 from datetime import datetime, timedelta

@ -35,6 +34,7 @@ class torlock(object):
            self.item_bad = False  # set to True for malicious links
            self.current_item = None  # dict for found item
            self.item_name = None  # key's name in current_item dict
+            self.page_items = 0
            self.parser_class = {"td": "pub_date",
                                 "ts": "size",
                                 "tul": "seeds",
@ -91,26 +91,19 @@ class torlock(object):
                    except Exception:
                        self.current_item["pub_date"] = -1
                    prettyPrinter(self.current_item)
+                    self.page_items += 1
                self.current_item = {}

    def search(self, query, cat='all'):
        """ Performs search """
        query = query.replace("%20", "-")
+        category = self.supported_categories[cat]

-        parser = self.MyHtmlParser(self.url)
-        page = "".join((self.url, "/", self.supported_categories[cat],
-                        "/torrents/", query, ".html?sort=seeds&page=1"))
-        html = retrieve_url(page)
-        parser.feed(html)
-
-        counter = 1
-        additional_pages = re_compile(r"/{0}/torrents/{1}.html\?sort=seeds&page=[0-9]+"
-                                      .format(self.supported_categories[cat], query))
-        list_searches = additional_pages.findall(html)[:-1]  # last link is next(i.e. second)
-        for page in map(lambda link: "".join((self.url, link)), list_searches):
-            html = retrieve_url(page)
+        for page in range(1, 5):
+            parser = self.MyHtmlParser(self.url)
+            page_url = f"{self.url}/{category}/torrents/{query}.html?sort=seeds&page={page}"
+            html = retrieve_url(page_url)
            parser.feed(html)
-            counter += 1
-            if counter > 3:
+            parser.close()
+            if parser.page_items < 20:
                break
-        parser.close()
--- a/nova3/engines/torrentproject.py
+++ b/nova3/engines/torrentproject.py
@ -1,4 +1,4 @@
-#VERSION: 1.4
+#VERSION: 1.5
 #AUTHORS: mauricci

 from helpers import retrieve_url
@ -102,26 +102,18 @@ class torrentproject(object):
                            elif curr_key != 'name':
                                self.singleResData[curr_key] += data.strip()

-        def feed(self, html):
-            HTMLParser.feed(self, html)
-            self.pageComplete = False
-            self.insideResults = False
-            self.insideDataDiv = False
-            self.spanCount = -1
-
    def search(self, what, cat='all'):
        # curr_cat = self.supported_categories[cat]
-        parser = self.MyHTMLParser(self.url)
        what = what.replace('%20', '+')
        # analyze first 5 pages of results
        for currPage in range(0, 5):
            url = self.url + '/browse?t={0}&p={1}'.format(what, currPage)
            html = retrieve_url(url)
+            parser = self.MyHTMLParser(self.url)
            parser.feed(html)
-            if len(parser.pageRes) <= 0:
+            parser.close()
+            if len(parser.pageRes) < 20:
                break
-            del parser.pageRes[:]
-        parser.close()

    def download_torrent(self, info):
        """ Downloader """
--- a/nova3/engines/versions.txt
+++ b/nova3/engines/versions.txt
@ -1,8 +1,8 @@
 eztv: 1.16
 jackett: 4.0
-limetorrents: 4.8
+limetorrents: 4.9
 piratebay: 3.3
-solidtorrents: 2.3
-torlock: 2.23
-torrentproject: 1.4
+solidtorrents: 2.4
+torlock: 2.24
+torrentproject: 1.5
 torrentscsv: 1.4