Function finder fixes (#1466)

Currently function_finder misses functions since we don't continue to
fetch if there's another page of results. However just increasing the
page size makes the script super slow.

This adds caches to the zip file and result fetching to try and get this
running at a reasonable speed for `weapon`. We also fetch until `next`
is null and increase the page size so all the results are fetched. This
runs about 12 minutes on my system now. Results look like this
https://gist.github.com/sozud/69aeafcc671d6354da474db952e8afef
This commit is contained in:
sozud 2024-08-02 12:28:12 -07:00 committed by GitHub
parent 50f5ad9dca
commit 74560d8545
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -10,7 +10,11 @@ def are_strings_similar(str1, str2, threshold=0.8):
similarity = difflib.SequenceMatcher(None, str1, str2).ratio()
return similarity >= threshold
zip_cache = {}
def get_asm(slug):
if slug in zip_cache:
return zip_cache[slug]
url = f'https://decomp.me/api/scratch/{slug}/export'
response = requests.get(url)
if response.status_code == 200:
@ -19,6 +23,7 @@ def get_asm(slug):
if 'target.s' in zip_contents:
with the_zip.open('target.s') as file:
target_content = file.read().decode('utf-8')
zip_cache[slug] = target_content
return target_content
else:
print("target.s not found in the zip file")
@ -26,22 +31,32 @@ def get_asm(slug):
print(f"Failed to download the zip file: Status code {response.status_code}")
return None
result_cache = {}
def fetch_all_results(url):
if url in result_cache:
return result_cache[url]
results = []
while url:
response = requests.get(url)
data = response.json()
results.extend(data.get('results', []))
url = data.get('next')
result_cache[url] = results
return results
def find_scratches(name, platform, local_asm=None, use_local=False):
try:
response = requests.get(f"https://decomp.me/api/scratch?search={name}")
response.raise_for_status()
scratches = json.loads(response.text)
except requests.exceptions.HTTPError as http_err:
print(f"\033[91mfind_scratches HTTP error: {http_err}", file=sys.stderr)
return None
except Exception as err:
print(f"\033[91mfind_scratches exception: {err}", file=sys.stderr)
return None
results = fetch_all_results(f"https://decomp.me/api/scratch?search={name}&page_size=100")
best_result = None
best_percent = 0
for result in scratches["results"]:
for result in results:
if not "name" in result:
continue
if not result["name"].startswith(name):