From 4e2f810f78fa3d0f8fdf6f068c9b016c303cd418 Mon Sep 17 00:00:00 2001 From: abnormalmaps Date: Mon, 3 Feb 2025 12:20:59 -0500 Subject: [PATCH] Add --dump=linking-info flag Adds the --dump=linking-info flag, which dumps the classes, selectors, and functions required by the binary, along with info about the symbol as JSON to stdout. Change-Id: I87e6db91fc68aecaa8dd3c516676aa81c2c34209 --- .gitignore | 4 + OPTIONS_HELP.txt | 9 +++ dev-docs/debugging.md | 5 ++ dev-scripts/log-unimplemented.sh | 16 ++++ src/dyld.rs | 61 +++++++++++++++ src/environment.rs | 17 +++++ src/objc/classes.rs | 75 ++++++++++++++++-- src/objc/selectors.rs | 127 ++++++++++++++++++++++++++++++- src/options.rs | 33 ++++++++ 9 files changed, 341 insertions(+), 6 deletions(-) create mode 100755 dev-scripts/log-unimplemented.sh diff --git a/.gitignore b/.gitignore index 15c755fb4..bef85c5ed 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,7 @@ local.properties android/app/src/main/jniLibs android/app/build android/build + +# Default developer dump file + +DUMP.txt diff --git a/OPTIONS_HELP.txt b/OPTIONS_HELP.txt index 55c554dde..845b60afd 100644 --- a/OPTIONS_HELP.txt +++ b/OPTIONS_HELP.txt @@ -166,6 +166,15 @@ Debugging options: Disables the graphical error message box (outputs to terminal/log file only). + --dump=... + Dumps some information about app/emulator to dump file. Check + src/options.rs:parse_dump_options for the currently available options. + + --dump-file=... + Pick the file for information to be dumped to, relative to the user + data directory. Defaults to {user directory}/DUMP.txt. Note that this + file will get truncated (deleted) if it already exists! + Other options: --preferred-languages=... Specifies a list of preferred languages to be reported to the app. diff --git a/dev-docs/debugging.md b/dev-docs/debugging.md index c8f6cb2cf..41e6b6e26 100644 --- a/dev-docs/debugging.md +++ b/dev-docs/debugging.md @@ -19,6 +19,11 @@ The `RUST_BACKTRACE=1` environment variable is always helpful. You'll probably w touchHLE will print the basic registers (r0-r13, SP, LR, PC) and a basic stack trace (using frame pointers) for the current thread when a panic occurs. To make sense of the result, you will probably want to open the app binary in Ghidra or another reverse-engineering tool. +## Dumping classes/selectors/function symbols from binaries +The `--dump=linking-info` flag dumps information about the classes, selectors, and lazy symbols (functions) that are requested by the binary, and how touchHLE is handling them. This is output to the file specified by `--dump-file=` (which defaults to the {running directory}/DUMP.txt. +The most useful application for this is determining which classes/selectors/functions that (might) be needed by an application are not implemented by touchHLE. This can be checked with `dev-scripts/log_unimplemented.sh [name of app to check]` (make sure `jq` is installed!). +The schemas for the JSON are described in `ObjC::dump_classes` (in `src/objc/classes.rs`), `ObjC::dump_selectors` (in `src/objc/selectors.rs`), and `Dyld::dump_lazy_symbols` (in `src/dyld.rs`). + ### GDB Remote Serial Protocol server For more complex cases, you can use the `--gdb=` command-line argument to start touchHLE in debugging mode, where it will provide a GDB Remote Serial Protocol server. You can then connect to touchHLE with GDB. (In theory LLDB also should work, but it doesn't.) diff --git a/dev-scripts/log-unimplemented.sh b/dev-scripts/log-unimplemented.sh new file mode 100755 index 000000000..71a4cc0f0 --- /dev/null +++ b/dev-scripts/log-unimplemented.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +# No set -e, since touchHLE can exit with an error + +if [[ -z "$1" ]] +then + echo 'Usage: ./log_unimplemented.sh [name of app to check]' + exit 1 +else + cargo run -- --dump=linking-info "$1" + cat DUMP.txt | jq --slurp '{ + "unimplemented_classes": ([.[] | select(.object == "classes") | .classes[] | select(.class_type == "unimplemented") | .name] | sort), + "unused_selectors": ([.[] | select(.object == "selectors") | .selectors[] | select(.instance_implementations or .class_implementations | not) | .selector] | sort), + "unlinked_symbols": ([.[] | select(.object == "lazy_symbols") | .symbols[] | select(.linked_to | not) | .symbol] | sort), + }' +fi diff --git a/src/dyld.rs b/src/dyld.rs index db48c6356..0d94919e2 100644 --- a/src/dyld.rs +++ b/src/dyld.rs @@ -238,6 +238,67 @@ impl Dyld { ns_string::register_constant_strings(&bins[0], mem, objc); } + /// Dumps all lazy symbols (functions) referenced by the binary + /// as JSON to stdout. + /// + /// The JSON has the following form: + /// ```json + /// { + /// "object": "lazy_symbols", + /// "symbols": [ + /// { + /// "symbol": ((name of symbol)), + /// "linked_to": "host" | "dylib" | null, + /// "dylib": ((name of dylib)) | null, + /// }, + /// ... + /// ] + /// } + /// ``` + pub fn dump_lazy_symbols( + &mut self, + bins: &[MachO], + file: &mut std::fs::File, + ) -> Result<(), std::io::Error> { + use std::io::Write; + // Guest binary is always bin 0. + let stubs = bins[0].get_section(SectionType::SymbolStubs).unwrap(); + let info = stubs.dyld_indirect_symbol_info.as_ref().unwrap(); + writeln!( + file, + "{{\n \"object\":\"lazy_symbols\",\n \"symbols\": [" + )?; + + 'sym: for (i, symbol) in info.indirect_undef_symbols.iter().enumerate() { + // Why doesn't json allow trailing commas... + let comma = if i == info.indirect_undef_symbols.len() - 1 { + "" + } else { + "," + }; + let symbol = symbol.as_ref().unwrap(); + if let Some(&(_, _)) = search_lists(function_lists::FUNCTION_LISTS, symbol) { + writeln!( + file, + " {{ \"symbol\": \"{symbol}\", \"linked_to\": \"host\"}}{comma}" + )?; + continue; + } + for dylib in bins.iter() { + if dylib.exported_symbols.contains_key(symbol) { + writeln!( + file, + " {{ \"symbol\": \"{}\", \"linked_to\": \"dylib\", \"dylib\": \"{}\"}}{}", + symbol, dylib.name, comma + )?; + continue 'sym; + } + } + writeln!(file, " {{ \"symbol\": \"{symbol}\" }}{comma}")?; + } + writeln!(file, " ]\n}}") + } + /// [Self::do_initial_linking] but for when this is the app picker's special /// environment with no binary (see [crate::Environment::new_without_app]). pub fn do_initial_linking_with_no_bins(&mut self, mem: &mut Mem, objc: &mut ObjC) { diff --git a/src/environment.rs b/src/environment.rs index 77b1166f0..c5b407488 100644 --- a/src/environment.rs +++ b/src/environment.rs @@ -111,6 +111,7 @@ pub struct Environment { /// Set to [true] when created using [Environment::new_without_app]. /// In practice, this means we are in the app picker. pub is_fake: bool, + pub dump_file: Option, } /// What to do next when executing this thread. @@ -394,8 +395,14 @@ impl Environment { gdb_server: None, env_vars: Default::default(), is_fake: false, + dump_file: None, }; + if env.options.dumping_options.any() { + env.dump_file = + Some(std::fs::File::create(&env.options.dumping_file).map_err(|e| e.to_string())?); + } + env.set_up_initial_env_vars(); dyld::Dyld::do_late_linking(&mut env); @@ -473,6 +480,15 @@ impl Environment { log_dbg!("Static initialization done"); } + if env.options.dumping_options.linking_info { + let file = env.dump_file.as_mut().unwrap(); + env.objc.dump_classes(file).unwrap(); + env.dyld.dump_lazy_symbols(&env.bins, file).unwrap(); + env.objc + .dump_selectors(&env.bins[0], &env.mem, file) + .unwrap(); + } + env.cpu.branch(entry_point_addr); Ok(env) } @@ -555,6 +571,7 @@ impl Environment { gdb_server: None, env_vars: Default::default(), is_fake: true, + dump_file: None, }; env.set_up_initial_env_vars(); diff --git a/src/objc/classes.rs b/src/objc/classes.rs index 64503a669..9ad410a2b 100644 --- a/src/objc/classes.rs +++ b/src/objc/classes.rs @@ -776,6 +776,66 @@ impl ObjC { (need, diff) } + /// Dumps all classes available to the emulator in JSON to stdout. + /// + /// The JSON has the following form: + /// ```json + /// { + /// "object": "classes", + /// "classes": [ + /// { + /// "name": ((name of class)), + /// "super": ((name of superclass, if available)), + /// "class_type": (("normal" | "unimplemented" | "fake")) + /// }, + /// ... + /// ] + /// } + /// ``` + pub fn dump_classes(&self, file: &mut std::fs::File) -> Result<(), std::io::Error> { + use std::io::Write; + writeln!(file, "{{\n \"object\": \"classes\",\n \"classes\": [")?; + for (i, (_, o)) in self.classes.iter().enumerate() { + // Why doesn't json allow trailing commas... + let comma = if i == self.classes.len() - 1 { "" } else { "," }; + + let host_obj = self.get_host_object(*o).unwrap(); + + if let Some(ClassHostObject { + name, + superclass: sup, + .. + }) = host_obj.as_any().downcast_ref() + { + if *sup == nil { + writeln!( + file, + " {{ \"name\": \"{name}\", \"class_type\": \"normal\" }}{comma}" + )?; + } else { + writeln!( + file, + " {{ \"name\": \"{}\", \"super\": \"{}\", \"class_type\": \"normal\" }}{}", + name, self.get_class_name(*sup), comma + )?; + } + } else if let Some(UnimplementedClass { name, .. }) = host_obj.as_any().downcast_ref() { + writeln!( + file, + " {{ \"name\": \"{name}\", \"class_type\": \"unimplemented\" }}{comma}" + )?; + } else if let Some(FakeClass { name, .. }) = host_obj.as_any().downcast_ref() { + writeln!( + file, + " {{ \"name\": \"{name}\", \"class_type\": \"fake\" }}{comma}" + )?; + } else { + panic!("Unrecognized class type!"); + } + } + writeln!(file, " ]\n}}") + } + /// For use by [crate::dyld]: register all the categories from the /// application binary. pub fn register_bin_categories(&mut self, bin: &MachO, mem: &mut Mem) { @@ -864,15 +924,20 @@ impl ObjC { } pub fn get_class_name(&self, class: Class) -> &str { - let host_object = self.get_host_object(class).unwrap(); + self.try_get_class_name(class) + .expect("Could not get class name!") + } + + pub fn try_get_class_name(&self, class: Class) -> Option<&str> { + let host_object = self.get_host_object(class)?; if let Some(ClassHostObject { name, .. }) = host_object.as_any().downcast_ref() { - name + Some(name) } else if let Some(UnimplementedClass { name, .. }) = host_object.as_any().downcast_ref() { - name + Some(name) } else if let Some(FakeClass { name, .. }) = host_object.as_any().downcast_ref() { - name + Some(name) } else { - panic!(); + None } } } diff --git a/src/objc/selectors.rs b/src/objc/selectors.rs index 36c3a7f37..e149f9b8e 100644 --- a/src/objc/selectors.rs +++ b/src/objc/selectors.rs @@ -14,10 +14,12 @@ //! Resources: //! - Apple's [The Objective-C Programming Language](https://developer.apple.com/library/archive/documentation/Cocoa/Conceptual/ObjectiveC/Chapters/ocSelectors.html) +use std::collections::HashMap; + use super::ObjC; use crate::abi::{GuestArg, GuestRet}; use crate::mach_o::MachO; -use crate::mem::{ConstPtr, Mem, MutPtr, Ptr}; +use crate::mem::{ConstPtr, Mem, MutPtr, Ptr, SafeRead}; use crate::Environment; /// Create a string literal for a selector from Objective-C message syntax @@ -67,6 +69,8 @@ impl SEL { } } +unsafe impl SafeRead for SEL {} + impl ObjC { pub fn lookup_selector(&self, name: &str) -> Option { self.selectors.get(name).copied() @@ -138,6 +142,127 @@ impl ObjC { mem.write(selref, sel.0); } } + + /// Dumps all selectors referenced by the binary as JSON to stdout. + /// + /// The JSON has the following form: + /// ```json + /// { + /// "object": "selectors", + /// "selectors": [ + /// { + /// "selector": ((name of selector)), + /// "instance_implementations": [ ((names of classes)) ] | null, + /// "class_implementations": [ ((names of classes)) ] | null, + /// }, + /// ... + /// ], + /// } + /// ``` + pub fn dump_selectors( + &self, + bin: &MachO, + mem: &Mem, + file: &mut std::fs::File, + ) -> Result<(), std::io::Error> { + use std::io::Write; + let Some(selrefs) = bin.get_section("__objc_selrefs") else { + writeln!(file, "{{ \"object\": \"selectors\", \"selectors\": [] }}")?; + log!("No selectors in binary!"); + return Ok(()); + }; + assert!(selrefs.size % 4 == 0); + // We manually gather selectors from the binary since it represents + // the selectors actually used, whereas using self.selectors + // would include all host selectors. + let base: ConstPtr = Ptr::from_bits(selrefs.addr); + let bin_sels: Vec = (0..(selrefs.size / 4)) + .map(|i| mem.read(base + i)) + .collect(); + + // Gather all selectors in all linked classes. The first vector is for + // instance methods, the second is for class methods. + let mut impl_selectors: HashMap, Vec<&str>)> = HashMap::new(); + for class in self.classes.values() { + let class_host_object = self.get_host_object(*class).unwrap(); + let Some(super::ClassHostObject { name, methods, .. }) = + class_host_object.as_any().downcast_ref() + else { + continue; + }; + for sel in methods.keys() { + let entry = impl_selectors.entry(*sel); + entry.or_default().0.push(name.as_str()); + } + let metaclass = Self::read_isa(*class, mem); + // Also get class methods: + let metaclass_host_object = self.get_host_object(metaclass).unwrap(); + let super::ClassHostObject { methods, .. } = + metaclass_host_object.as_any().downcast_ref().unwrap(); + for sel in methods.keys() { + let entry = impl_selectors.entry(*sel); + entry.or_default().1.push(name.as_str()); + } + } + + // Also check unlinked host classes: just because the binary doesn't + // link them in directly doesn't mean that it won't use it! + for &class_list in super::CLASS_LISTS { + for (class_name, template) in class_list { + if self.classes.contains_key(*class_name) { + continue; + } + + for &(sel_name, _) in template.instance_methods { + let sel = self.lookup_selector(sel_name).unwrap(); + let entry = impl_selectors.entry(sel); + entry.or_default().0.push(class_name); + } + + for &(sel_name, _) in template.class_methods { + let sel = self.lookup_selector(sel_name).unwrap(); + let entry = impl_selectors.entry(sel); + entry.or_default().1.push(class_name); + } + } + } + + write!( + file, + "{{\n \"object\": \"selectors\",\n \"selectors\": [ " + )?; + for (i, sel) in bin_sels.iter().enumerate() { + // Why doesn't json allow trailing commas... + let comma = if i == bin_sels.len() - 1 { "" } else { "," }; + + let name = sel.as_str(mem); + write!(file, " {{ \"selector\": \"{name}\"")?; + if let Some((instance_impls, class_impls)) = impl_selectors.get(sel) { + if !instance_impls.is_empty() { + write!(file, ", \"instance_implementations\": [ ")?; + for (j, class) in instance_impls.iter().enumerate() { + let comma = if j == instance_impls.len() - 1 { + "" + } else { + "," + }; + write!(file, "\"{class}\"{comma} ")?; + } + write!(file, "]")?; + } + if !class_impls.is_empty() { + write!(file, ", \"class_implementations\": [ ")?; + for (j, class) in class_impls.iter().enumerate() { + let comma = if j == class_impls.len() - 1 { "" } else { "," }; + write!(file, "\"{class}\"{comma} ")?; + } + write!(file, "]")?; + } + } + writeln!(file, "}}{comma}")?; + } + write!(file, " ]\n}}") + } } /// Standard Objective-C runtime function for selector registration. diff --git a/src/options.rs b/src/options.rs index 6fdbee42b..0733971cd 100644 --- a/src/options.rs +++ b/src/options.rs @@ -11,6 +11,7 @@ use std::collections::HashMap; use std::io::{BufRead, BufReader, Read}; use std::net::{SocketAddr, ToSocketAddrs}; use std::num::NonZeroU32; +use std::path::PathBuf; pub const OPTIONS_HELP: &str = include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/OPTIONS_HELP.txt")); @@ -54,6 +55,8 @@ pub struct Options { pub force_composition: bool, pub network_access: bool, pub popup_errors: bool, + pub dumping_options: DumpingOptions, + pub dumping_file: PathBuf, } impl Default for Options { @@ -80,6 +83,8 @@ impl Default for Options { force_composition: false, network_access: false, popup_errors: true, + dumping_options: Default::default(), + dumping_file: crate::paths::user_data_base_path().join("DUMP.txt"), } } } @@ -205,6 +210,10 @@ impl Options { self.network_access = true; } else if arg == "--no-error-popup" { self.popup_errors = false; + } else if let Some(values) = arg.strip_prefix("--dump=") { + self.dumping_options = parse_dump_options(values)?; + } else if let Some(path) = arg.strip_prefix("--dump-file=") { + self.dumping_file = crate::paths::user_data_base_path().join(path); } else { return Ok(false); }; @@ -254,3 +263,27 @@ pub fn get_options_from_file(file: F, app_id: &str) -> Result bool { + self.linking_info + } +} + +fn parse_dump_options(options: &str) -> Result { + let mut dumping_options = DumpingOptions::default(); + for opt in options.split(",") { + if opt == "linking-info" { + dumping_options.linking_info = true; + } else { + return Err(format!("Unrecognized option {opt} for --dump=...")); + } + } + Ok(dumping_options) +}