Match Functions with Same Name in dups (#1449)

The duplicates report has several false negatives when a function has
the same name as another function which has not been decompiled. This
affects overlays which share many of the same function names (e.g.
`EntityWeaponAttack`).

The duplicates tool now parses the `INCLUDE_ASM` macro to extract the
path and ASM file. It then uses that when determining whether or not
each ASM file is actually included or not. Previously, only the function
name was checked, so if any `INCLUDE_ASM` file had the target function
name, it would be marked as not decompiled.

Before:

% | Decomp? | Name | Asm Path

-----|-------|-------------------------------|--------------------------------
| 1.00 | false | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_008/EntityWeaponAttack.s
| 1.00 | false | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_009/EntityWeaponAttack.s
| 0.91 | false | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_010/EntityWeaponAttack.s
| 1.00 | false | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_011/EntityWeaponAttack.s
| 0.98 | false | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_025/EntityWeaponAttack.s
| 1.00 | false | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_026/EntityWeaponAttack.s
| 1.00 | false | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_027/EntityWeaponAttack.s
| 1.00 | false | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_028/EntityWeaponAttack.s
| 0.97 | false | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_058/EntityWeaponAttack.s

After:

% | Decomp? | Name | Asm Path

-----|-------|-------------------------------|--------------------------------
1.00 | true | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_008/EntityWeaponAttack.s
1.00 | true | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_009/EntityWeaponAttack.s
0.91 | false | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_010/EntityWeaponAttack.s
1.00 | true | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_011/EntityWeaponAttack.s
0.98 | false | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_025/EntityWeaponAttack.s
1.00 | true | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_026/EntityWeaponAttack.s
1.00 | true | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_027/EntityWeaponAttack.s
1.00 | true | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_028/EntityWeaponAttack.s
0.97 | true | EntityWeaponAttack |
../../asm/us/weapon/nonmatchings/w_058/EntityWeaponAttack.s

(note: `w_008`, `w_009`, and `w_011` are decompiled in my workspace, but
not in GH)

Co-authored-by: Jonathan Hohle <jon@ttkb.co>
This commit is contained in:
Jonathan Hohle 2024-07-28 08:48:41 -07:00 committed by GitHub
parent 93c51db4fd
commit 85cbb974cd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 78 additions and 3 deletions

45
tools/dups/Cargo.lock generated
View File

@ -2,6 +2,15 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "anstream"
version = "0.3.2"
@ -118,6 +127,7 @@ name = "dups"
version = "0.1.0"
dependencies = [
"clap",
"regex",
]
[[package]]
@ -176,6 +186,12 @@ version = "0.4.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "once_cell"
version = "1.18.0"
@ -200,6 +216,35 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "rustix"
version = "0.38.7"

View File

@ -7,4 +7,5 @@ edition = "2021"
[dependencies]
clap = { version = "4.3.19", features = ["derive"] }
regex = "1.10.5"

View File

@ -1,3 +1,4 @@
use regex::Regex;
use std::env::*;
use std::fs;
use std::fs::File;
@ -151,11 +152,13 @@ struct Args {
pub struct IncludeAsmEntry {
pub line: String,
pub path: String,
pub asm_path: String,
}
fn process_directory_for_include_asm(dir: &str) -> Vec<IncludeAsmEntry> {
let entries = std::fs::read_dir(dir).expect("Unable to read directory");
let re = Regex::new("INCLUDE_ASM\\(\"([^\"]*)\", ([^)]*)\\)").unwrap();
let mut output = Vec::new();
entries.for_each(|entry| {
@ -170,9 +173,12 @@ fn process_directory_for_include_asm(dir: &str) -> Vec<IncludeAsmEntry> {
let line_str = line.unwrap();
if line_str.contains("INCLUDE_ASM") {
let (full, [asm_dir, asm_file]) = re.captures(&line_str).unwrap().extract();
output.push(IncludeAsmEntry {
line: line_str.clone(),
path: item_path.to_string_lossy().to_string(),
asm_path: format!("../../asm/us/{}/{}.s", asm_dir, asm_file),
});
}
}
@ -304,7 +310,7 @@ fn do_dups_report(output_file: Option<String>, threshold: f64) {
SrcAsmPair {
asm_dir: String::from("../../asm/us/weapon/nonmatchings/"),
src_dir: String::from("../../src/weapon/"),
overlay_name: String::from("WEAPON"),
overlay_name: String::from("WEAPON0"),
include_asm: get_all_include_asm("../../src/weapon/"),
path_matcher: "/weapon/".to_string(),
},
@ -386,7 +392,7 @@ fn do_dups_report(output_file: Option<String>, threshold: f64) {
for pair in &pairs.clone() {
if function.file.contains(&pair.path_matcher) {
for inc in &pair.include_asm {
if inc.line.contains(&function.name) {
if function.file == inc.asm_path && inc.line.contains(&function.name) {
decompiled = false;
}
}

View File

@ -1,2 +1,25 @@
test:
python3 tools/symbols_test.py
$(PYTHON) tools/symbols_test.py
function-finder:
# TODO: make sure graphviz is installed
$(MAKE) force_symbols
$(MAKE) force_extract
$(PYTHON) tools/analyze_calls.py
git clean -fdx asm/
git checkout config/
rm -f build/us/main.ld
rm -rf build/us/weapon.ld
$(MAKE) -j extract
$(PYTHON) tools/function_finder/function_finder_psx.py --use-call-trees > gh-duplicates/functions.md
rm -rf gh-duplicates/function_calls || true
mv function_calls gh-duplicates/
mv function_graphs.md gh-duplicates/
duplicates-report:
$(MAKE) force_symbols
$(MAKE) force_extract
cd tools/dups; \
cargo run --release -- \
--threshold .90 \
--output-file ../../gh-duplicates/duplicates.txt