Function finder fixes (#1466)

Currently function_finder misses functions since we don't continue to fetch if there's another page of results. However just increasing the page size makes the script super slow. This adds caches to the zip file and result fetching to try and get this running at a reasonable speed for `weapon`. We also fetch until `next` is null and increase the page size so all the results are fetched. This runs about 12 minutes on my system now. Results look like this https://gist.github.com/sozud/69aeafcc671d6354da474db952e8afef
2025-02-17 03:30:02 +00:00 · 2024-08-02 12:28:12 -07:00 · 2024-08-02 12:28:12 -07:00 · 74560d8545
commit 74560d8545
parent 50f5ad9dca
1 changed files with 26 additions and 11 deletions
--- a/tools/function_finder/helpers.py
+++ b/tools/function_finder/helpers.py
@ -10,7 +10,11 @@ def are_strings_similar(str1, str2, threshold=0.8):
    similarity = difflib.SequenceMatcher(None, str1, str2).ratio()
    return similarity >= threshold

+zip_cache = {}
+
 def get_asm(slug):
+    if slug in zip_cache:
+        return zip_cache[slug]
    url = f'https://decomp.me/api/scratch/{slug}/export'
    response = requests.get(url)
    if response.status_code == 200:
@ -19,6 +23,7 @@ def get_asm(slug):
            if 'target.s' in zip_contents:
                with the_zip.open('target.s') as file:
                    target_content = file.read().decode('utf-8')
+                zip_cache[slug] = target_content
                return target_content
            else:
                print("target.s not found in the zip file")
@ -26,22 +31,32 @@ def get_asm(slug):
        print(f"Failed to download the zip file: Status code {response.status_code}")
    return None

+result_cache = {}
+
+def fetch_all_results(url):
+    if url in result_cache:
+        return result_cache[url]
+
+    results = []
+
+    while url:
+        response = requests.get(url)
+        data = response.json()
+
+        results.extend(data.get('results', []))
+
+        url = data.get('next')
+
+    result_cache[url] = results
+    return results
+
 def find_scratches(name, platform, local_asm=None, use_local=False):
-    try:
-        response = requests.get(f"https://decomp.me/api/scratch?search={name}")
-        response.raise_for_status()
-        scratches = json.loads(response.text)
-    except requests.exceptions.HTTPError as http_err:
-        print(f"\033[91mfind_scratches HTTP error: {http_err}", file=sys.stderr)
-        return None
-    except Exception as err:
-        print(f"\033[91mfind_scratches exception: {err}", file=sys.stderr)
-        return None
+    results = fetch_all_results(f"https://decomp.me/api/scratch?search={name}&page_size=100")

    best_result = None
    best_percent = 0

-    for result in scratches["results"]:
+    for result in results:
        if not "name" in result:
            continue
        if not result["name"].startswith(name):