mirror of
https://github.com/isledecomp/isle.git
synced 2024-12-03 11:00:42 +00:00
Data comparison tool (#618)
* Parse cvdump TYPES section. Add datacmp tool. * Corrections * Use static * Revert "Use static" This reverts commite0a4324e00
. * Handle partially initialized variable * Shuffle order of legounksavedatawriter * Revert "Shuffle order of legounksavedatawriter" This reverts commit506e06f117
. --------- Co-authored-by: Christian Semmler <mail@csemmler.com>
This commit is contained in:
parent
068760056a
commit
ec1fcce08c
@ -24,6 +24,9 @@ protected:
|
||||
// SYNTHETIC: CONFIG 0x00403cb0
|
||||
// CAboutDialog::`scalar deleting destructor'
|
||||
|
||||
// FUNCTION: CONFIG 0x00403d30
|
||||
// CAboutDialog::_GetBaseMessageMap
|
||||
|
||||
// FUNCTION: CONFIG 0x00403d40
|
||||
// CAboutDialog::GetMessageMap
|
||||
|
||||
|
@ -55,6 +55,9 @@ protected:
|
||||
// SYNTHETIC: CONFIG 0x00403de0
|
||||
// CMainDialog::`scalar deleting destructor'
|
||||
|
||||
// FUNCTION: CONFIG 0x00403e60
|
||||
// CMainDialog::_GetBaseMessageMap
|
||||
|
||||
// FUNCTION: CONFIG 0x00403e70
|
||||
// CMainDialog::GetMessageMap
|
||||
|
||||
|
@ -76,6 +76,9 @@ public:
|
||||
// SYNTHETIC: CONFIG 0x00402cd0
|
||||
// CConfigApp::`scalar deleting destructor'
|
||||
|
||||
// FUNCTION: CONFIG 0x402c20
|
||||
// CConfigApp::_GetBaseMessageMap
|
||||
|
||||
// FUNCTION: CONFIG 0x402c30
|
||||
// CConfigApp::GetMessageMap
|
||||
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include "mxticklemanager.h"
|
||||
|
||||
// GLOBAL: LEGO1 0x100f0160
|
||||
undefined4 g_unk0x100f0160;
|
||||
undefined4 g_unk0x100f0160 = 3;
|
||||
|
||||
// FUNCTION: LEGO1 0x100046a0
|
||||
GasStation::GasStation()
|
||||
|
@ -10,7 +10,7 @@
|
||||
#include "mxrect32.h"
|
||||
|
||||
// GLOBAL: LEGO1 0x101020e8
|
||||
void (*g_omniUserMessage)(const char*, int);
|
||||
void (*g_omniUserMessage)(const char*, int) = NULL;
|
||||
|
||||
// FUNCTION: LEGO1 0x100b6e10
|
||||
MxBool GetRectIntersection(
|
||||
|
@ -17,13 +17,13 @@
|
||||
#include "mxvideomanager.h"
|
||||
|
||||
// GLOBAL: LEGO1 0x101015b8
|
||||
char g_hdPath[1024];
|
||||
char g_hdPath[1024] = "";
|
||||
|
||||
// GLOBAL: LEGO1 0x101019b8
|
||||
char g_cdPath[1024];
|
||||
char g_cdPath[1024] = "E:";
|
||||
|
||||
// GLOBAL: LEGO1 0x10101db8
|
||||
MxBool g_use3dSound;
|
||||
MxBool g_use3dSound = FALSE;
|
||||
|
||||
// GLOBAL: LEGO1 0x101015b0
|
||||
MxOmni* MxOmni::g_instance = NULL;
|
||||
|
341
tools/datacmp.py
Normal file
341
tools/datacmp.py
Normal file
@ -0,0 +1,341 @@
|
||||
# (New) Data comparison.
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import logging
|
||||
from enum import Enum
|
||||
from typing import Iterable, List, NamedTuple, Optional, Tuple
|
||||
from struct import unpack
|
||||
from isledecomp.compare import Compare as IsleCompare
|
||||
from isledecomp.compare.db import MatchInfo
|
||||
from isledecomp.cvdump import Cvdump
|
||||
from isledecomp.cvdump.types import (
|
||||
CvdumpKeyError,
|
||||
CvdumpIntegrityError,
|
||||
)
|
||||
from isledecomp.bin import Bin as IsleBin
|
||||
import colorama
|
||||
|
||||
colorama.init()
|
||||
|
||||
|
||||
# Ignore all compare-db messages.
|
||||
logging.getLogger("isledecomp.compare").addHandler(logging.NullHandler())
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(description="Comparing data values.")
|
||||
parser.add_argument(
|
||||
"original", metavar="original-binary", help="The original binary"
|
||||
)
|
||||
parser.add_argument(
|
||||
"recompiled", metavar="recompiled-binary", help="The recompiled binary"
|
||||
)
|
||||
parser.add_argument(
|
||||
"pdb", metavar="recompiled-pdb", help="The PDB of the recompiled binary"
|
||||
)
|
||||
parser.add_argument(
|
||||
"decomp_dir", metavar="decomp-dir", help="The decompiled source tree"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-v",
|
||||
"--verbose",
|
||||
action=argparse.BooleanOptionalAction,
|
||||
default=False,
|
||||
help="",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-color", "-n", action="store_true", help="Do not color the output"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--print-rec-addr",
|
||||
action="store_true",
|
||||
help="Print addresses of recompiled functions too",
|
||||
)
|
||||
|
||||
(args, _) = parser.parse_known_args()
|
||||
|
||||
if not os.path.isfile(args.original):
|
||||
parser.error(f"Original binary {args.original} does not exist")
|
||||
|
||||
if not os.path.isfile(args.recompiled):
|
||||
parser.error(f"Recompiled binary {args.recompiled} does not exist")
|
||||
|
||||
if not os.path.isfile(args.pdb):
|
||||
parser.error(f"Symbols PDB {args.pdb} does not exist")
|
||||
|
||||
if not os.path.isdir(args.decomp_dir):
|
||||
parser.error(f"Source directory {args.decomp_dir} does not exist")
|
||||
|
||||
return args
|
||||
|
||||
|
||||
class CompareResult(Enum):
|
||||
MATCH = 1
|
||||
DIFF = 2
|
||||
ERROR = 3
|
||||
WARN = 4
|
||||
|
||||
|
||||
class ComparedOffset(NamedTuple):
|
||||
offset: int
|
||||
# name is None for scalar types
|
||||
name: Optional[str]
|
||||
match: bool
|
||||
values: Tuple[str, str]
|
||||
|
||||
|
||||
class ComparisonItem(NamedTuple):
|
||||
"""Each variable that was compared"""
|
||||
|
||||
orig_addr: int
|
||||
recomp_addr: int
|
||||
name: str
|
||||
|
||||
# The list of items that were compared.
|
||||
# For a complex type, these are the members.
|
||||
# For a scalar type, this is a list of size one.
|
||||
# If we could not retrieve type information, this is
|
||||
# a list of size one but without any specific type.
|
||||
compared: List[ComparedOffset]
|
||||
|
||||
# If present, the error message from the types parser.
|
||||
error: Optional[str] = None
|
||||
|
||||
# If true, there is no type specified for this variable. (i.e. non-public)
|
||||
# In this case, we can only compare the raw bytes.
|
||||
# This is different from the situation where a type id _is_ given, but
|
||||
# we could not retrieve it for some reason. (This is an error.)
|
||||
raw_only: bool = False
|
||||
|
||||
@property
|
||||
def result(self) -> CompareResult:
|
||||
if self.error is not None:
|
||||
return CompareResult.ERROR
|
||||
|
||||
if all(c.match for c in self.compared):
|
||||
return CompareResult.MATCH
|
||||
|
||||
# Prefer WARN for a diff without complete type information.
|
||||
return CompareResult.WARN if self.raw_only else CompareResult.DIFF
|
||||
|
||||
|
||||
def create_comparison_item(
|
||||
var: MatchInfo,
|
||||
compared: Optional[List[ComparedOffset]] = None,
|
||||
error: Optional[str] = None,
|
||||
raw_only: bool = False,
|
||||
) -> ComparisonItem:
|
||||
"""Helper to create the ComparisonItem from the fields in MatchInfo."""
|
||||
if compared is None:
|
||||
compared = []
|
||||
|
||||
return ComparisonItem(
|
||||
orig_addr=var.orig_addr,
|
||||
recomp_addr=var.recomp_addr,
|
||||
name=var.name,
|
||||
compared=compared,
|
||||
error=error,
|
||||
raw_only=raw_only,
|
||||
)
|
||||
|
||||
|
||||
def do_the_comparison(args: argparse.Namespace) -> Iterable[ComparisonItem]:
|
||||
"""Run through each variable in our compare DB, then do the comparison
|
||||
according to the variable's type. Emit the result."""
|
||||
with IsleBin(args.original, find_str=True) as origfile, IsleBin(
|
||||
args.recompiled
|
||||
) as recompfile:
|
||||
isle_compare = IsleCompare(origfile, recompfile, args.pdb, args.decomp_dir)
|
||||
|
||||
# TODO: We don't currently retain the type information of each variable
|
||||
# in our compare DB. To get those, we build this mini-lookup table that
|
||||
# maps recomp addresses to their type.
|
||||
# We still need to build the full compare DB though, because we may
|
||||
# need the matched symbols to compare pointers (e.g. on strings)
|
||||
mini_cvdump = Cvdump(args.pdb).globals().types().run()
|
||||
|
||||
recomp_type_reference = {
|
||||
recompfile.get_abs_addr(g.section, g.offset): g.type
|
||||
for g in mini_cvdump.globals
|
||||
if recompfile.is_valid_section(g.section)
|
||||
}
|
||||
|
||||
for var in isle_compare.get_variables():
|
||||
type_name = recomp_type_reference.get(var.recomp_addr)
|
||||
|
||||
# Start by assuming we can only compare the raw bytes
|
||||
data_size = var.size
|
||||
is_type_aware = type_name is not None
|
||||
|
||||
if is_type_aware:
|
||||
try:
|
||||
# If we are type-aware, we can get the precise
|
||||
# data size for the variable.
|
||||
data_type = mini_cvdump.types.get(type_name)
|
||||
data_size = data_type.size
|
||||
except (CvdumpKeyError, CvdumpIntegrityError) as ex:
|
||||
yield create_comparison_item(var, error=repr(ex))
|
||||
continue
|
||||
|
||||
orig_raw = origfile.read(var.orig_addr, data_size)
|
||||
recomp_raw = recompfile.read(var.recomp_addr, data_size)
|
||||
|
||||
# If either read exceeded the raw data size for the section,
|
||||
# assume the entire variable is uninitialized.
|
||||
# TODO: This is not correct, strictly speaking. However,
|
||||
# it is probably impossible for a variable to exceed
|
||||
# the virtual size of the section, so all that is left is
|
||||
# the uninitialized data.
|
||||
# If the variable falls at the end of the section like this,
|
||||
# it is highly likely to be uninitialized.
|
||||
if orig_raw is not None and len(orig_raw) < data_size:
|
||||
orig_raw = None
|
||||
|
||||
if recomp_raw is not None and len(recomp_raw) < data_size:
|
||||
recomp_raw = None
|
||||
|
||||
# If both variables are uninitialized, we consider them equal.
|
||||
# Otherwise, this is a diff but there is nothing to compare.
|
||||
if orig_raw is None or recomp_raw is None:
|
||||
match = orig_raw is None and recomp_raw is None
|
||||
orig_value = "(uninitialized)" if orig_raw is None else "(initialized)"
|
||||
recomp_value = (
|
||||
"(uninitialized)" if recomp_raw is None else "(initialized)"
|
||||
)
|
||||
yield create_comparison_item(
|
||||
var,
|
||||
compared=[
|
||||
ComparedOffset(
|
||||
offset=0,
|
||||
name=None,
|
||||
match=match,
|
||||
values=(orig_value, recomp_value),
|
||||
)
|
||||
],
|
||||
)
|
||||
continue
|
||||
|
||||
if not is_type_aware:
|
||||
# If there is no specific type information available
|
||||
# (i.e. if this is a static or non-public variable)
|
||||
# then we can only compare the raw bytes.
|
||||
yield create_comparison_item(
|
||||
var,
|
||||
compared=[
|
||||
ComparedOffset(
|
||||
offset=0,
|
||||
name="(raw)",
|
||||
match=orig_raw == recomp_raw,
|
||||
values=(orig_raw, recomp_raw),
|
||||
)
|
||||
],
|
||||
raw_only=True,
|
||||
)
|
||||
continue
|
||||
|
||||
# If we are here, we can do the type-aware comparison.
|
||||
compared = []
|
||||
compare_items = mini_cvdump.types.get_scalars(type_name)
|
||||
format_str = mini_cvdump.types.get_format_string(type_name)
|
||||
|
||||
orig_data = unpack(format_str, orig_raw)
|
||||
recomp_data = unpack(format_str, recomp_raw)
|
||||
|
||||
def pointer_display(addr: int, is_orig: bool) -> str:
|
||||
"""Helper to streamline pointer textual display."""
|
||||
if addr == 0:
|
||||
return "nullptr"
|
||||
|
||||
ptr_match = (
|
||||
isle_compare.get_by_orig(addr)
|
||||
if is_orig
|
||||
else isle_compare.get_by_recomp(addr)
|
||||
)
|
||||
|
||||
if ptr_match is not None:
|
||||
return f"Pointer to {ptr_match.match_name()}"
|
||||
|
||||
# This variable did not match if we do not have
|
||||
# the pointer target in our DB.
|
||||
return f"Unknown pointer 0x{addr:x}"
|
||||
|
||||
# Could zip here
|
||||
for i, member in enumerate(compare_items):
|
||||
if member.is_pointer:
|
||||
match = isle_compare.is_pointer_match(orig_data[i], recomp_data[i])
|
||||
|
||||
value_a = pointer_display(orig_data[i], True)
|
||||
value_b = pointer_display(recomp_data[i], False)
|
||||
|
||||
values = (value_a, value_b)
|
||||
else:
|
||||
match = orig_data[i] == recomp_data[i]
|
||||
values = (orig_data[i], recomp_data[i])
|
||||
|
||||
compared.append(
|
||||
ComparedOffset(
|
||||
offset=member.offset,
|
||||
name=member.name,
|
||||
match=match,
|
||||
values=values,
|
||||
)
|
||||
)
|
||||
|
||||
yield create_comparison_item(var, compared=compared)
|
||||
|
||||
|
||||
def value_get(value: Optional[str], default: str):
|
||||
return value if value is not None else default
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
def display_match(result: CompareResult) -> str:
|
||||
"""Helper to return color string or not, depending on user preference"""
|
||||
if args.no_color:
|
||||
return result.name
|
||||
|
||||
match_color = (
|
||||
colorama.Fore.GREEN
|
||||
if result == CompareResult.MATCH
|
||||
else (
|
||||
colorama.Fore.YELLOW
|
||||
if result == CompareResult.WARN
|
||||
else colorama.Fore.RED
|
||||
)
|
||||
)
|
||||
return f"{match_color}{result.name}{colorama.Style.RESET_ALL}"
|
||||
|
||||
for item in do_the_comparison(args):
|
||||
if not args.verbose and item.result == CompareResult.MATCH:
|
||||
continue
|
||||
|
||||
address_display = (
|
||||
f"0x{item.orig_addr:x} / 0x{item.recomp_addr:x}"
|
||||
if args.print_rec_addr
|
||||
else f"0x{item.orig_addr:x}"
|
||||
)
|
||||
|
||||
print(f"{item.name[:80]} ({address_display}) ... {display_match(item.result)} ")
|
||||
if item.error is not None:
|
||||
print(f" {item.error}")
|
||||
|
||||
for c in item.compared:
|
||||
if not args.verbose and c.match:
|
||||
continue
|
||||
|
||||
(value_a, value_b) = c.values
|
||||
if c.match:
|
||||
print(f" {c.offset:5} {value_get(c.name, '(value)'):30} {value_a}")
|
||||
else:
|
||||
print(
|
||||
f" {c.offset:5} {value_get(c.name, '(value)'):30} {value_a} : {value_b}"
|
||||
)
|
||||
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -95,6 +95,7 @@ class Compare:
|
||||
.publics()
|
||||
.symbols()
|
||||
.section_contributions()
|
||||
.types()
|
||||
.run()
|
||||
)
|
||||
res = CvdumpAnalysis(cv)
|
||||
@ -454,6 +455,25 @@ class Compare:
|
||||
|
||||
## Public API
|
||||
|
||||
def is_pointer_match(self, orig_addr, recomp_addr) -> bool:
|
||||
"""Check whether these pointers point at the same thing"""
|
||||
|
||||
# Null pointers considered matching
|
||||
if orig_addr == 0 and recomp_addr == 0:
|
||||
return True
|
||||
|
||||
match = self._db.get_by_orig(orig_addr)
|
||||
if match is None:
|
||||
return False
|
||||
|
||||
return match.recomp_addr == recomp_addr
|
||||
|
||||
def get_by_orig(self, addr: int) -> Optional[MatchInfo]:
|
||||
return self._db.get_by_orig(addr)
|
||||
|
||||
def get_by_recomp(self, addr: int) -> Optional[MatchInfo]:
|
||||
return self._db.get_by_recomp(addr)
|
||||
|
||||
def get_all(self) -> List[MatchInfo]:
|
||||
return self._db.get_all()
|
||||
|
||||
@ -463,6 +483,9 @@ class Compare:
|
||||
def get_vtables(self) -> List[MatchInfo]:
|
||||
return self._db.get_matches_by_type(SymbolType.VTABLE)
|
||||
|
||||
def get_variables(self) -> List[MatchInfo]:
|
||||
return self._db.get_matches_by_type(SymbolType.DATA)
|
||||
|
||||
def compare_address(self, addr: int) -> Optional[DiffReport]:
|
||||
match = self._db.get_one_match(addr)
|
||||
if match is None:
|
||||
|
@ -1,3 +1,4 @@
|
||||
from .analysis import CvdumpAnalysis
|
||||
from .parser import CvdumpParser
|
||||
from .runner import Cvdump
|
||||
from .types import CvdumpTypesParser
|
||||
|
@ -1,45 +1,9 @@
|
||||
"""For collating the results from parsing cvdump.exe into a more directly useful format."""
|
||||
from typing import List, Optional, Tuple
|
||||
from typing import List, Optional
|
||||
from isledecomp.types import SymbolType
|
||||
from .parser import CvdumpParser
|
||||
from .demangler import demangle_string_const, demangle_vtable
|
||||
|
||||
|
||||
def data_type_info(type_name: str) -> Optional[Tuple[int, bool]]:
|
||||
"""cvdump type aliases are listed here:
|
||||
https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h
|
||||
For the given type, return tuple(size, is_pointer) if possible."""
|
||||
# pylint: disable=too-many-return-statements
|
||||
# TODO: refactor to be as simple as possble
|
||||
|
||||
# Ignore complex types. We can get the size of those from the TYPES section.
|
||||
if not type_name.startswith("T"):
|
||||
return None
|
||||
|
||||
# if 32-bit pointer
|
||||
if type_name.startswith("T_32P"):
|
||||
return (4, True)
|
||||
|
||||
if type_name.endswith("QUAD") or type_name.endswith("64"):
|
||||
return (8, False)
|
||||
|
||||
if (
|
||||
type_name.endswith("LONG")
|
||||
or type_name.endswith("INT4")
|
||||
or type_name.endswith("32")
|
||||
):
|
||||
return (4, False)
|
||||
|
||||
if type_name.endswith("SHORT") or type_name.endswith("WCHAR"):
|
||||
return (2, False)
|
||||
|
||||
if "CHAR" in type_name:
|
||||
return (1, False)
|
||||
|
||||
if type_name in ("T_NOTYPE", "T_VOID"):
|
||||
return (0, False)
|
||||
|
||||
return None
|
||||
from .types import CvdumpKeyError, CvdumpIntegrityError
|
||||
|
||||
|
||||
class CvdumpNode:
|
||||
@ -146,11 +110,21 @@ class CvdumpAnalysis:
|
||||
node_dict[key].node_type = SymbolType.DATA
|
||||
node_dict[key].friendly_name = glo.name
|
||||
|
||||
if (g_info := data_type_info(glo.type)) is not None:
|
||||
(size, is_pointer) = g_info
|
||||
node_dict[key].confirmed_size = size
|
||||
if is_pointer:
|
||||
node_dict[key].node_type = SymbolType.POINTER
|
||||
try:
|
||||
# Check our types database for type information.
|
||||
# If we did not parse the TYPES section, we can only
|
||||
# get information for built-in "T_" types.
|
||||
g_info = parser.types.get(glo.type)
|
||||
node_dict[key].confirmed_size = g_info.size
|
||||
# Previously we set the symbol type to POINTER here if
|
||||
# the variable was known to be a pointer. We can derive this
|
||||
# information later when it's time to compare the variable,
|
||||
# so let's set these to symbol type DATA instead.
|
||||
# POINTER will be reserved for non-variable pointer data.
|
||||
# e.g. thunks, unwind section.
|
||||
except (CvdumpKeyError, CvdumpIntegrityError):
|
||||
# No big deal if we don't have complete type information.
|
||||
pass
|
||||
|
||||
for lin in parser.lines:
|
||||
key = (lin.section, lin.offset)
|
||||
|
@ -1,9 +1,10 @@
|
||||
import re
|
||||
from typing import Iterable, Tuple
|
||||
from collections import namedtuple
|
||||
from .types import CvdumpTypesParser
|
||||
|
||||
# e.g. `*** PUBLICS`
|
||||
_section_change_regex = re.compile(r"^\*\*\* (?P<section>[A-Z/ ]+)")
|
||||
_section_change_regex = re.compile(r"^\*\*\* (?P<section>[A-Z/ ]+)$")
|
||||
|
||||
# e.g. ` 27 00034EC0 28 00034EE2 29 00034EE7 30 00034EF4`
|
||||
_line_addr_pairs_findall = re.compile(r"\s+(?P<line_no>\d+) (?P<addr>[A-F0-9]{8})")
|
||||
@ -76,6 +77,8 @@ class CvdumpParser:
|
||||
self.globals = []
|
||||
self.modules = []
|
||||
|
||||
self.types = CvdumpTypesParser()
|
||||
|
||||
def _lines_section(self, line: str):
|
||||
"""Parsing entries from the LINES section. We only care about the pairs of
|
||||
line_number and address and the subsection header to indicate which code file
|
||||
@ -198,6 +201,9 @@ class CvdumpParser:
|
||||
elif self._section == "MODULES":
|
||||
self._modules_section(line)
|
||||
|
||||
elif self._section == "TYPES":
|
||||
self.types.read_line(line)
|
||||
|
||||
def read_lines(self, lines: Iterable[str]):
|
||||
for line in lines:
|
||||
self.read_line(line)
|
||||
|
@ -14,6 +14,7 @@ class DumpOpt(Enum):
|
||||
PUBLICS = 3
|
||||
SECTION_CONTRIB = 4
|
||||
MODULES = 5
|
||||
TYPES = 6
|
||||
|
||||
|
||||
cvdump_opt_map = {
|
||||
@ -23,6 +24,7 @@ cvdump_opt_map = {
|
||||
DumpOpt.PUBLICS: "-p",
|
||||
DumpOpt.SECTION_CONTRIB: "-seccontrib",
|
||||
DumpOpt.MODULES: "-m",
|
||||
DumpOpt.TYPES: "-t",
|
||||
}
|
||||
|
||||
|
||||
@ -55,6 +57,10 @@ class Cvdump:
|
||||
self._options.add(DumpOpt.MODULES)
|
||||
return self
|
||||
|
||||
def types(self):
|
||||
self._options.add(DumpOpt.TYPES)
|
||||
return self
|
||||
|
||||
def cmd_line(self) -> List[str]:
|
||||
cvdump_exe = lib_path_join("cvdump.exe")
|
||||
flags = [cvdump_opt_map[opt] for opt in self._options]
|
||||
|
433
tools/isledecomp/isledecomp/cvdump/types.py
Normal file
433
tools/isledecomp/isledecomp/cvdump/types.py
Normal file
@ -0,0 +1,433 @@
|
||||
import re
|
||||
from typing import Dict, Iterator, List, NamedTuple, Optional
|
||||
|
||||
|
||||
class CvdumpTypeError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class CvdumpKeyError(KeyError):
|
||||
pass
|
||||
|
||||
|
||||
class CvdumpIntegrityError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class FieldListItem(NamedTuple):
|
||||
"""Member of a class or structure"""
|
||||
|
||||
offset: int
|
||||
name: str
|
||||
type: str
|
||||
|
||||
|
||||
class ScalarType(NamedTuple):
|
||||
offset: int
|
||||
name: Optional[str]
|
||||
type: str
|
||||
|
||||
@property
|
||||
def size(self) -> int:
|
||||
return scalar_type_size(self.type)
|
||||
|
||||
@property
|
||||
def format_char(self) -> str:
|
||||
return scalar_type_format_char(self.type)
|
||||
|
||||
@property
|
||||
def is_pointer(self) -> bool:
|
||||
return scalar_type_pointer(self.type)
|
||||
|
||||
|
||||
class TypeInfo(NamedTuple):
|
||||
key: str
|
||||
size: int
|
||||
name: Optional[str] = None
|
||||
members: Optional[List[FieldListItem]] = None
|
||||
|
||||
def is_scalar(self) -> bool:
|
||||
# TODO: distinction between a class with zero members and no vtable?
|
||||
return self.members is None
|
||||
|
||||
|
||||
def normalize_type_id(key: str) -> str:
|
||||
"""Helper for TYPES parsing to ensure a consistent format.
|
||||
If key begins with "T_" it is a built-in type.
|
||||
Else it is a hex string. We prefer lower case letters and
|
||||
no leading zeroes. (UDT identifier pads to 8 characters.)"""
|
||||
if key.startswith("T_"):
|
||||
# Remove numeric value for "T_" type. We don't use this.
|
||||
return key[: key.index("(")] if "(" in key else key
|
||||
|
||||
return hex(int(key, 16)).lower()
|
||||
|
||||
|
||||
def scalar_type_pointer(type_name: str) -> bool:
|
||||
return type_name.startswith("T_32P")
|
||||
|
||||
|
||||
def scalar_type_size(type_name: str) -> int:
|
||||
if scalar_type_pointer(type_name):
|
||||
return 4
|
||||
|
||||
if "CHAR" in type_name:
|
||||
return 2 if "WCHAR" in type_name else 1
|
||||
|
||||
if "SHORT" in type_name:
|
||||
return 2
|
||||
|
||||
if "QUAD" in type_name or "64" in type_name:
|
||||
return 8
|
||||
|
||||
return 4
|
||||
|
||||
|
||||
def scalar_type_signed(type_name: str) -> bool:
|
||||
if scalar_type_pointer(type_name):
|
||||
return False
|
||||
|
||||
# According to cvinfo.h, T_WCHAR is unsigned
|
||||
return not type_name.startswith("T_U") and not type_name.startswith("T_W")
|
||||
|
||||
|
||||
def scalar_type_format_char(type_name: str) -> str:
|
||||
if scalar_type_pointer(type_name):
|
||||
return "L"
|
||||
|
||||
# "Really a char"
|
||||
if type_name.startswith("T_RCHAR"):
|
||||
return "c"
|
||||
|
||||
# floats
|
||||
if type_name.startswith("T_REAL"):
|
||||
return "d" if "64" in type_name else "f"
|
||||
|
||||
size = scalar_type_size(type_name)
|
||||
char = ({1: "b", 2: "h", 4: "l", 8: "q"}).get(size, "l")
|
||||
|
||||
return char if scalar_type_signed(type_name) else char.upper()
|
||||
|
||||
|
||||
def member_string_iter(
|
||||
members: List[ScalarType], size: Optional[int] = None
|
||||
) -> Iterator[str]:
|
||||
if len(members) == 0:
|
||||
yield "x" * (size or 0)
|
||||
|
||||
last_offset = 0
|
||||
last_size = 0
|
||||
for m in members:
|
||||
padding = m.offset - last_offset - last_size
|
||||
if padding > 0:
|
||||
yield "x" * padding
|
||||
|
||||
yield m.format_char
|
||||
last_offset = m.offset
|
||||
last_size = m.size
|
||||
|
||||
if size is not None:
|
||||
padding = size - (last_offset + last_size)
|
||||
if padding > 0:
|
||||
yield "x" * padding
|
||||
|
||||
|
||||
def member_list_to_struct_string(
|
||||
members: List[ScalarType], size: Optional[int] = None
|
||||
) -> str:
|
||||
"""Create a string for use with struct.unpack
|
||||
Will pad to `size` bytes if present."""
|
||||
if len(members) == 0:
|
||||
return "x" * (size or 0)
|
||||
|
||||
format_string = "".join(list(member_string_iter(members, size)))
|
||||
if len(format_string) > 0:
|
||||
return "<" + format_string
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def join_member_names(parent: str, child: Optional[str]) -> str:
|
||||
"""Helper method to combine parent/child member names.
|
||||
Child member name is None if the child is a scalar type."""
|
||||
|
||||
if child is None:
|
||||
return parent
|
||||
|
||||
# If the child is an array index, join without the dot
|
||||
if child.startswith("["):
|
||||
return f"{parent}{child}"
|
||||
|
||||
return f"{parent}.{child}"
|
||||
|
||||
|
||||
class CvdumpTypesParser:
|
||||
"""Parser for cvdump output, TYPES section.
|
||||
Tricky enough that it demands its own parser."""
|
||||
|
||||
# Marks the start of a new type
|
||||
INDEX_RE = re.compile(r"(?P<key>0x\w+) : .* (?P<type>LF_\w+)")
|
||||
|
||||
# LF_FIELDLIST class/struct member (1/2)
|
||||
LIST_RE = re.compile(
|
||||
r"\s+list\[\d+\] = LF_MEMBER, (?P<scope>\w+), type = (?P<type>.*), offset = (?P<offset>\d+)"
|
||||
)
|
||||
|
||||
# LF_FIELDLIST vtable indicator
|
||||
VTABLE_RE = re.compile(r"^\s+list\[\d+\] = LF_VFUNCTAB")
|
||||
|
||||
# LF_FIELDLIST superclass indicator
|
||||
SUPERCLASS_RE = re.compile(
|
||||
r"^\s+list\[\d+\] = LF_BCLASS, (?P<scope>\w+), type = (?P<type>.*), offset = (?P<offset>\d+)"
|
||||
)
|
||||
|
||||
# LF_FIELDLIST member name (2/2)
|
||||
MEMBER_RE = re.compile(r"^\s+member name = '(?P<name>.*)'$")
|
||||
|
||||
# LF_ARRAY element type
|
||||
ARRAY_ELEMENT_RE = re.compile(r"^\s+Element type = (?P<type>.*)")
|
||||
|
||||
# LF_ARRAY total array size
|
||||
ARRAY_LENGTH_RE = re.compile(r"^\s+length = (?P<length>\d+)")
|
||||
|
||||
# LF_CLASS/LF_STRUCTURE field list reference
|
||||
CLASS_FIELD_RE = re.compile(
|
||||
r"^\s+# members = \d+, field list type (?P<field_type>0x\w+),"
|
||||
)
|
||||
|
||||
# LF_CLASS/LF_STRUCTURE name and other info
|
||||
CLASS_NAME_RE = re.compile(
|
||||
r"^\s+Size = (?P<size>\d+), class name = (?P<name>.+), UDT\((?P<udt>0x\w+)\)"
|
||||
)
|
||||
|
||||
# LF_MODIFIER, type being modified
|
||||
MODIFIES_RE = re.compile(r".*modifies type (?P<type>.*)$")
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.mode = ""
|
||||
self.last_key = ""
|
||||
self.keys = {}
|
||||
|
||||
def _new_type(self):
|
||||
"""Prepare a new dict for the type we just parsed.
|
||||
The id is self.last_key and the "type" of type is self.mode.
|
||||
e.g. LF_CLASS"""
|
||||
self.keys[self.last_key] = {"type": self.mode}
|
||||
|
||||
def _set(self, key: str, value):
|
||||
self.keys[self.last_key][key] = value
|
||||
|
||||
def _add_member(self, offset: int, type_: str):
|
||||
obj = self.keys[self.last_key]
|
||||
if "members" not in obj:
|
||||
obj["members"] = []
|
||||
|
||||
obj["members"].append({"offset": offset, "type": type_})
|
||||
|
||||
def _set_member_name(self, name: str):
|
||||
"""Set name for most recently added member."""
|
||||
obj = self.keys[self.last_key]
|
||||
obj["members"][-1]["name"] = name
|
||||
|
||||
def _get_field_list(self, type_obj: Dict) -> List[FieldListItem]:
|
||||
"""Return the field list for the given LF_CLASS/LF_STRUCTURE reference"""
|
||||
|
||||
if type_obj.get("type") == "LF_FIELDLIST":
|
||||
field_obj = type_obj
|
||||
else:
|
||||
field_list_type = type_obj.get("field_list_type")
|
||||
field_obj = self.keys[field_list_type]
|
||||
|
||||
members: List[FieldListItem] = []
|
||||
|
||||
super_id = field_obj.get("super")
|
||||
if super_id is not None:
|
||||
# May need to resolve forward ref.
|
||||
superclass = self.get(super_id)
|
||||
if superclass.members is not None:
|
||||
members = superclass.members
|
||||
|
||||
raw_members = field_obj.get("members", [])
|
||||
members += [
|
||||
FieldListItem(
|
||||
offset=m["offset"],
|
||||
type=m["type"],
|
||||
name=m["name"],
|
||||
)
|
||||
for m in raw_members
|
||||
]
|
||||
|
||||
return sorted(members, key=lambda m: m.offset)
|
||||
|
||||
def _mock_array_members(self, type_obj: Dict) -> List[FieldListItem]:
|
||||
"""LF_ARRAY elements provide the element type and the total size.
|
||||
We want the list of "members" as if this was a struct."""
|
||||
|
||||
if type_obj.get("type") != "LF_ARRAY":
|
||||
raise CvdumpTypeError("Type is not an LF_ARRAY")
|
||||
|
||||
array_type = type_obj.get("array_type")
|
||||
if array_type is None:
|
||||
raise CvdumpIntegrityError("No array element type")
|
||||
|
||||
array_element_size = self.get(array_type).size
|
||||
|
||||
n_elements = type_obj["size"] // array_element_size
|
||||
|
||||
return [
|
||||
FieldListItem(
|
||||
offset=i * array_element_size,
|
||||
type=array_type,
|
||||
name=f"[{i}]",
|
||||
)
|
||||
for i in range(n_elements)
|
||||
]
|
||||
|
||||
def get(self, type_key: str) -> TypeInfo:
|
||||
"""Convert our dictionary values read from the cvdump output
|
||||
into a consistent format for the given type."""
|
||||
|
||||
# Scalar type. Handled here because it makes the recursive steps
|
||||
# much simpler.
|
||||
if type_key.startswith("T_"):
|
||||
size = scalar_type_size(type_key)
|
||||
return TypeInfo(
|
||||
key=type_key,
|
||||
size=size,
|
||||
)
|
||||
|
||||
# Go to our dictionary to find it.
|
||||
obj = self.keys.get(type_key.lower())
|
||||
if obj is None:
|
||||
raise CvdumpKeyError(type_key)
|
||||
|
||||
# These type references are just a wrapper around a scalar
|
||||
if obj.get("type") == "LF_ENUM":
|
||||
return self.get("T_INT4")
|
||||
|
||||
if obj.get("type") == "LF_POINTER":
|
||||
return self.get("T_32PVOID")
|
||||
|
||||
if obj.get("is_forward_ref", False):
|
||||
# Get the forward reference to follow.
|
||||
# If this is LF_CLASS/LF_STRUCTURE, it is the UDT value.
|
||||
# For LF_MODIFIER, it is the type being modified.
|
||||
forward_ref = obj.get("udt", None) or obj.get("modifies", None)
|
||||
if forward_ref is None:
|
||||
raise CvdumpIntegrityError(f"Null forward ref for type {type_key}")
|
||||
|
||||
return self.get(forward_ref)
|
||||
|
||||
# Else it is not a forward reference, so build out the object here.
|
||||
if obj.get("type") == "LF_ARRAY":
|
||||
members = self._mock_array_members(obj)
|
||||
else:
|
||||
members = self._get_field_list(obj)
|
||||
|
||||
return TypeInfo(
|
||||
key=type_key,
|
||||
size=obj.get("size"),
|
||||
name=obj.get("name"),
|
||||
members=members,
|
||||
)
|
||||
|
||||
def get_by_name(self, name: str) -> TypeInfo:
|
||||
"""Find the complex type with the given name."""
|
||||
# TODO
|
||||
raise NotImplementedError
|
||||
|
||||
def get_scalars(self, type_key: str) -> List[ScalarType]:
|
||||
"""Reduce the given type to a list of scalars so we can
|
||||
compare each component value."""
|
||||
|
||||
obj = self.get(type_key)
|
||||
if obj.is_scalar():
|
||||
# Use obj.key here for alias types like LF_POINTER
|
||||
return [ScalarType(offset=0, type=obj.key, name=None)]
|
||||
|
||||
# mypy?
|
||||
assert obj.members is not None
|
||||
|
||||
# Dedupe repeated offsets if this is a union type
|
||||
unique_offsets = {m.offset: m for m in obj.members}
|
||||
unique_members = [m for _, m in unique_offsets.items()]
|
||||
|
||||
return [
|
||||
ScalarType(
|
||||
offset=m.offset + cm.offset,
|
||||
type=cm.type,
|
||||
name=join_member_names(m.name, cm.name),
|
||||
)
|
||||
for m in unique_members
|
||||
for cm in self.get_scalars(m.type)
|
||||
]
|
||||
|
||||
def get_format_string(self, type_key: str) -> str:
|
||||
obj = self.get(type_key)
|
||||
members = self.get_scalars(type_key)
|
||||
# We need both to pad the data to size
|
||||
return member_list_to_struct_string(members, obj.size)
|
||||
|
||||
def read_line(self, line: str):
|
||||
if (match := self.INDEX_RE.match(line)) is not None:
|
||||
self.last_key = normalize_type_id(match.group("key"))
|
||||
self.mode = match.group("type")
|
||||
self._new_type()
|
||||
|
||||
# We don't need to read anything else from here (for now)
|
||||
if self.mode in ("LF_ENUM", "LF_POINTER"):
|
||||
self._set("size", 4)
|
||||
|
||||
if self.mode == "LF_MODIFIER":
|
||||
if (match := self.MODIFIES_RE.match(line)) is not None:
|
||||
# For convenience, because this is essentially the same thing
|
||||
# as an LF_CLASS forward ref.
|
||||
self._set("is_forward_ref", True)
|
||||
self._set("modifies", normalize_type_id(match.group("type")))
|
||||
|
||||
if self.mode == "LF_ARRAY":
|
||||
if (match := self.ARRAY_ELEMENT_RE.match(line)) is not None:
|
||||
self._set("array_type", normalize_type_id(match.group("type")))
|
||||
|
||||
if (match := self.ARRAY_LENGTH_RE.match(line)) is not None:
|
||||
self._set("size", int(match.group("length")))
|
||||
|
||||
if self.mode == "LF_FIELDLIST":
|
||||
# If this class has a vtable, create a mock member at offset 0
|
||||
if (match := self.VTABLE_RE.match(line)) is not None:
|
||||
# For our purposes, any pointer type will do
|
||||
self._add_member(0, "T_32PVOID")
|
||||
self._set_member_name("vftable")
|
||||
|
||||
# Superclass is set here in the fieldlist rather than in LF_CLASS
|
||||
if (match := self.SUPERCLASS_RE.match(line)) is not None:
|
||||
self._set("super", normalize_type_id(match.group("type")))
|
||||
|
||||
# Member offset and type given on the first of two lines.
|
||||
if (match := self.LIST_RE.match(line)) is not None:
|
||||
self._add_member(
|
||||
int(match.group("offset")), normalize_type_id(match.group("type"))
|
||||
)
|
||||
|
||||
# Name of the member read on the second of two lines.
|
||||
if (match := self.MEMBER_RE.match(line)) is not None:
|
||||
self._set_member_name(match.group("name"))
|
||||
|
||||
if self.mode in ("LF_STRUCTURE", "LF_CLASS"):
|
||||
# Match the reference to the associated LF_FIELDLIST
|
||||
if (match := self.CLASS_FIELD_RE.match(line)) is not None:
|
||||
if match.group("field_type") == "0x0000":
|
||||
# Not redundant. UDT might not match the key.
|
||||
# These cases get reported as UDT mismatch.
|
||||
self._set("is_forward_ref", True)
|
||||
else:
|
||||
field_list_type = normalize_type_id(match.group("field_type"))
|
||||
self._set("field_list_type", field_list_type)
|
||||
|
||||
# Last line has the vital information.
|
||||
# If this is a FORWARD REF, we need to follow the UDT pointer
|
||||
# to get the actual class details.
|
||||
if (match := self.CLASS_NAME_RE.match(line)) is not None:
|
||||
self._set("name", match.group("name"))
|
||||
self._set("udt", normalize_type_id(match.group("udt")))
|
||||
self._set("size", int(match.group("size")))
|
@ -1,39 +1,59 @@
|
||||
import pytest
|
||||
from isledecomp.cvdump.analysis import data_type_info
|
||||
from isledecomp.cvdump.types import (
|
||||
scalar_type_size,
|
||||
scalar_type_pointer,
|
||||
scalar_type_signed,
|
||||
)
|
||||
|
||||
# These are all the types seen in the cvdump.
|
||||
# We have char, short, int, long, long long, float, and double all represented
|
||||
# in both signed and unsigned.
|
||||
# We can also identify a 4 byte pointer with the T_32 prefix.
|
||||
# The type T_VOID is used to designate a function's return type.
|
||||
# T_NOTYPE is specified as the type of "this" for a static function in a class.
|
||||
|
||||
# For reference: https://github.com/microsoft/microsoft-pdb/blob/master/include/cvinfo.h
|
||||
|
||||
# fmt: off
|
||||
type_check_cases = [
|
||||
("T_32PINT4", 4, True),
|
||||
("T_32PLONG", 4, True),
|
||||
("T_32PRCHAR", 4, True),
|
||||
("T_32PREAL32", 4, True),
|
||||
("T_32PUCHAR", 4, True),
|
||||
("T_32PUINT4", 4, True),
|
||||
("T_32PULONG", 4, True),
|
||||
("T_32PUSHORT", 4, True),
|
||||
("T_32PVOID", 4, True),
|
||||
("T_CHAR", 1, False),
|
||||
("T_INT4", 4, False),
|
||||
("T_LONG", 4, False),
|
||||
("T_NOTYPE", 0, False), # ?
|
||||
("T_QUAD", 8, False),
|
||||
("T_RCHAR", 1, False),
|
||||
("T_REAL32", 4, False),
|
||||
("T_REAL64", 8, False),
|
||||
("T_SHORT", 2, False),
|
||||
("T_UCHAR", 1, False),
|
||||
("T_UINT4", 4, False),
|
||||
("T_ULONG", 4, False),
|
||||
("T_UQUAD", 8, False),
|
||||
("T_USHORT", 2, False),
|
||||
("T_VOID", 0, False), # ?
|
||||
("T_WCHAR", 2, False),
|
||||
]
|
||||
# Fields are: type_name, size, is_signed, is_pointer
|
||||
type_check_cases = (
|
||||
("T_32PINT4", 4, False, True),
|
||||
("T_32PLONG", 4, False, True),
|
||||
("T_32PRCHAR", 4, False, True),
|
||||
("T_32PREAL32", 4, False, True),
|
||||
("T_32PUCHAR", 4, False, True),
|
||||
("T_32PUINT4", 4, False, True),
|
||||
("T_32PULONG", 4, False, True),
|
||||
("T_32PUSHORT", 4, False, True),
|
||||
("T_32PVOID", 4, False, True),
|
||||
("T_CHAR", 1, True, False),
|
||||
("T_INT4", 4, True, False),
|
||||
("T_LONG", 4, True, False),
|
||||
("T_QUAD", 8, True, False),
|
||||
("T_RCHAR", 1, True, False),
|
||||
("T_REAL32", 4, True, False),
|
||||
("T_REAL64", 8, True, False),
|
||||
("T_SHORT", 2, True, False),
|
||||
("T_UCHAR", 1, False, False),
|
||||
("T_UINT4", 4, False, False),
|
||||
("T_ULONG", 4, False, False),
|
||||
("T_UQUAD", 8, False, False),
|
||||
("T_USHORT", 2, False, False),
|
||||
("T_WCHAR", 2, False, False),
|
||||
)
|
||||
# fmt: on
|
||||
|
||||
|
||||
@pytest.mark.parametrize("type_name, size, is_pointer", type_check_cases)
|
||||
def test_type_check(type_name: str, size: int, is_pointer: bool):
|
||||
assert (info := data_type_info(type_name)) is not None
|
||||
assert info[0] == size
|
||||
assert info[1] == is_pointer
|
||||
@pytest.mark.parametrize("type_name, size, _, __", type_check_cases)
|
||||
def test_scalar_size(type_name: str, size: int, _, __):
|
||||
assert scalar_type_size(type_name) == size
|
||||
|
||||
|
||||
@pytest.mark.parametrize("type_name, _, is_signed, __", type_check_cases)
|
||||
def test_scalar_signed(type_name: str, _, is_signed: bool, __):
|
||||
assert scalar_type_signed(type_name) == is_signed
|
||||
|
||||
|
||||
@pytest.mark.parametrize("type_name, _, __, is_pointer", type_check_cases)
|
||||
def test_scalar_pointer(type_name: str, _, __, is_pointer: bool):
|
||||
assert scalar_type_pointer(type_name) == is_pointer
|
||||
|
452
tools/isledecomp/tests/test_cvdump_types.py
Normal file
452
tools/isledecomp/tests/test_cvdump_types.py
Normal file
@ -0,0 +1,452 @@
|
||||
"""Specifically testing the Cvdump TYPES parser
|
||||
and type dependency tree walker."""
|
||||
|
||||
import pytest
|
||||
from isledecomp.cvdump.types import (
|
||||
CvdumpTypesParser,
|
||||
CvdumpKeyError,
|
||||
CvdumpIntegrityError,
|
||||
)
|
||||
|
||||
TEST_LINES = """
|
||||
0x1028 : Length = 10, Leaf = 0x1001 LF_MODIFIER
|
||||
const, modifies type T_REAL32(0040)
|
||||
|
||||
0x103b : Length = 14, Leaf = 0x1503 LF_ARRAY
|
||||
Element type = T_REAL32(0040)
|
||||
Index type = T_SHORT(0011)
|
||||
length = 16
|
||||
Name =
|
||||
|
||||
0x103c : Length = 14, Leaf = 0x1503 LF_ARRAY
|
||||
Element type = 0x103B
|
||||
Index type = T_SHORT(0011)
|
||||
length = 64
|
||||
Name =
|
||||
|
||||
0x10e0 : Length = 86, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_MEMBER, public, type = T_REAL32(0040), offset = 0
|
||||
member name = 'x'
|
||||
list[1] = LF_MEMBER, public, type = T_REAL32(0040), offset = 0
|
||||
member name = 'dvX'
|
||||
list[2] = LF_MEMBER, public, type = T_REAL32(0040), offset = 4
|
||||
member name = 'y'
|
||||
list[3] = LF_MEMBER, public, type = T_REAL32(0040), offset = 4
|
||||
member name = 'dvY'
|
||||
list[4] = LF_MEMBER, public, type = T_REAL32(0040), offset = 8
|
||||
member name = 'z'
|
||||
list[5] = LF_MEMBER, public, type = T_REAL32(0040), offset = 8
|
||||
member name = 'dvZ'
|
||||
|
||||
0x10e1 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
|
||||
# members = 6, field list type 0x10e0,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 12, class name = _D3DVECTOR, UDT(0x000010e1)
|
||||
|
||||
0x10e4 : Length = 14, Leaf = 0x1503 LF_ARRAY
|
||||
Element type = T_UCHAR(0020)
|
||||
Index type = T_SHORT(0011)
|
||||
length = 8
|
||||
Name =
|
||||
|
||||
0x10ea : Length = 14, Leaf = 0x1503 LF_ARRAY
|
||||
Element type = 0x1028
|
||||
Index type = T_SHORT(0011)
|
||||
length = 12
|
||||
Name =
|
||||
|
||||
0x11f0 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = MxRect32, UDT(0x00001214)
|
||||
|
||||
0x11f2 : Length = 10, Leaf = 0x1001 LF_MODIFIER
|
||||
const, modifies type 0x11F0
|
||||
|
||||
0x1213 : Length = 530, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_METHOD, count = 5, list = 0x1203, name = 'MxRect32'
|
||||
list[1] = LF_ONEMETHOD, public, VANILLA, index = 0x1205, name = 'operator='
|
||||
list[2] = LF_ONEMETHOD, public, VANILLA, index = 0x11F5, name = 'Intersect'
|
||||
list[3] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'SetPoint'
|
||||
list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'AddPoint'
|
||||
list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x1207, name = 'SubtractPoint'
|
||||
list[6] = LF_ONEMETHOD, public, VANILLA, index = 0x11F5, name = 'UpdateBounds'
|
||||
list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x1209, name = 'IsValid'
|
||||
list[8] = LF_ONEMETHOD, public, VANILLA, index = 0x120A, name = 'IntersectsWith'
|
||||
list[9] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetWidth'
|
||||
list[10] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetHeight'
|
||||
list[11] = LF_ONEMETHOD, public, VANILLA, index = 0x120C, name = 'GetPoint'
|
||||
list[12] = LF_ONEMETHOD, public, VANILLA, index = 0x120D, name = 'GetSize'
|
||||
list[13] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetLeft'
|
||||
list[14] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetTop'
|
||||
list[15] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetRight'
|
||||
list[16] = LF_ONEMETHOD, public, VANILLA, index = 0x120B, name = 'GetBottom'
|
||||
list[17] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetLeft'
|
||||
list[18] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetTop'
|
||||
list[19] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetRight'
|
||||
list[20] = LF_ONEMETHOD, public, VANILLA, index = 0x120E, name = 'SetBottom'
|
||||
list[21] = LF_METHOD, count = 3, list = 0x1211, name = 'CopyFrom'
|
||||
list[22] = LF_ONEMETHOD, private, STATIC, index = 0x1212, name = 'Min'
|
||||
list[23] = LF_ONEMETHOD, private, STATIC, index = 0x1212, name = 'Max'
|
||||
list[24] = LF_MEMBER, private, type = T_INT4(0074), offset = 0
|
||||
member name = 'm_left'
|
||||
list[25] = LF_MEMBER, private, type = T_INT4(0074), offset = 4
|
||||
member name = 'm_top'
|
||||
list[26] = LF_MEMBER, private, type = T_INT4(0074), offset = 8
|
||||
member name = 'm_right'
|
||||
list[27] = LF_MEMBER, private, type = T_INT4(0074), offset = 12
|
||||
member name = 'm_bottom'
|
||||
|
||||
0x1214 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 34, field list type 0x1213, CONSTRUCTOR, OVERLOAD,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 16, class name = MxRect32, UDT(0x00001214)
|
||||
|
||||
0x1220 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = MxCore, UDT(0x00004060)
|
||||
|
||||
0x14db : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = MxString, UDT(0x00004db6)
|
||||
|
||||
0x19b0 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
|
||||
# members = 0, field list type 0x0000, FORWARD REF,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 0, class name = ROIColorAlias, UDT(0x00002a76)
|
||||
|
||||
0x19b1 : Length = 14, Leaf = 0x1503 LF_ARRAY
|
||||
Element type = 0x19B0
|
||||
Index type = T_SHORT(0011)
|
||||
length = 440
|
||||
Name =
|
||||
|
||||
0x2a75 : Length = 98, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_MEMBER, public, type = T_32PRCHAR(0470), offset = 0
|
||||
member name = 'm_name'
|
||||
list[1] = LF_MEMBER, public, type = T_INT4(0074), offset = 4
|
||||
member name = 'm_red'
|
||||
list[2] = LF_MEMBER, public, type = T_INT4(0074), offset = 8
|
||||
member name = 'm_green'
|
||||
list[3] = LF_MEMBER, public, type = T_INT4(0074), offset = 12
|
||||
member name = 'm_blue'
|
||||
list[4] = LF_MEMBER, public, type = T_INT4(0074), offset = 16
|
||||
member name = 'm_unk0x10'
|
||||
|
||||
0x2a76 : Length = 34, Leaf = 0x1505 LF_STRUCTURE
|
||||
# members = 5, field list type 0x2a75,
|
||||
Derivation list type 0x0000, VT shape type 0x0000
|
||||
Size = 20, class name = ROIColorAlias, UDT(0x00002a76)
|
||||
|
||||
0x22d4 : Length = 154, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_VFUNCTAB, type = 0x20FC
|
||||
list[1] = LF_METHOD, count = 3, list = 0x22D0, name = 'MxVariable'
|
||||
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F0F,
|
||||
vfptr offset = 0, name = 'GetValue'
|
||||
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F10,
|
||||
vfptr offset = 4, name = 'SetValue'
|
||||
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x1F11,
|
||||
vfptr offset = 8, name = '~MxVariable'
|
||||
list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x22D3, name = 'GetKey'
|
||||
list[6] = LF_MEMBER, protected, type = 0x14DB, offset = 4
|
||||
member name = 'm_key'
|
||||
list[7] = LF_MEMBER, protected, type = 0x14DB, offset = 20
|
||||
member name = 'm_value'
|
||||
|
||||
0x22d5 : Length = 34, Leaf = 0x1504 LF_CLASS
|
||||
# members = 10, field list type 0x22d4, CONSTRUCTOR,
|
||||
Derivation list type 0x0000, VT shape type 0x20fb
|
||||
Size = 36, class name = MxVariable, UDT(0x00004041)
|
||||
|
||||
0x3cc2 : Length = 38, Leaf = 0x1507 LF_ENUM
|
||||
# members = 64, type = T_INT4(0074) field list type 0x3cc1
|
||||
NESTED, enum name = JukeBox::JukeBoxScript, UDT(0x00003cc2)
|
||||
|
||||
0x3fab : Length = 10, Leaf = 0x1002 LF_POINTER
|
||||
Pointer (NEAR32), Size: 0
|
||||
Element type : 0x3FAA
|
||||
|
||||
0x405f : Length = 158, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_VFUNCTAB, type = 0x2090
|
||||
list[1] = LF_ONEMETHOD, public, VANILLA, index = 0x176A, name = 'MxCore'
|
||||
list[2] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176A,
|
||||
vfptr offset = 0, name = '~MxCore'
|
||||
list[3] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x176B,
|
||||
vfptr offset = 4, name = 'Notify'
|
||||
list[4] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2087,
|
||||
vfptr offset = 8, name = 'Tickle'
|
||||
list[5] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x202F,
|
||||
vfptr offset = 12, name = 'ClassName'
|
||||
list[6] = LF_ONEMETHOD, public, INTRODUCING VIRTUAL, index = 0x2030,
|
||||
vfptr offset = 16, name = 'IsA'
|
||||
list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x2091, name = 'GetId'
|
||||
list[8] = LF_MEMBER, private, type = T_UINT4(0075), offset = 4
|
||||
member name = 'm_id'
|
||||
|
||||
0x4060 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 9, field list type 0x405f, CONSTRUCTOR,
|
||||
Derivation list type 0x0000, VT shape type 0x1266
|
||||
Size = 8, class name = MxCore, UDT(0x00004060)
|
||||
|
||||
0x4262 : Length = 14, Leaf = 0x1503 LF_ARRAY
|
||||
Element type = 0x3CC2
|
||||
Index type = T_SHORT(0011)
|
||||
length = 24
|
||||
Name =
|
||||
|
||||
0x432f : Length = 14, Leaf = 0x1503 LF_ARRAY
|
||||
Element type = T_INT4(0074)
|
||||
Index type = T_SHORT(0011)
|
||||
length = 12
|
||||
Name =
|
||||
|
||||
0x4db5 : Length = 246, Leaf = 0x1203 LF_FIELDLIST
|
||||
list[0] = LF_BCLASS, public, type = 0x1220, offset = 0
|
||||
list[1] = LF_METHOD, count = 3, list = 0x14E3, name = 'MxString'
|
||||
list[2] = LF_ONEMETHOD, public, VIRTUAL, index = 0x14DE, name = '~MxString'
|
||||
list[3] = LF_METHOD, count = 2, list = 0x14E7, name = 'operator='
|
||||
list[4] = LF_ONEMETHOD, public, VANILLA, index = 0x14DE, name = 'ToUpperCase'
|
||||
list[5] = LF_ONEMETHOD, public, VANILLA, index = 0x14DE, name = 'ToLowerCase'
|
||||
list[6] = LF_ONEMETHOD, public, VANILLA, index = 0x14E8, name = 'operator+'
|
||||
list[7] = LF_ONEMETHOD, public, VANILLA, index = 0x14E9, name = 'operator+='
|
||||
list[8] = LF_ONEMETHOD, public, VANILLA, index = 0x14EB, name = 'Compare'
|
||||
list[9] = LF_ONEMETHOD, public, VANILLA, index = 0x14EC, name = 'GetData'
|
||||
list[10] = LF_ONEMETHOD, public, VANILLA, index = 0x4DB4, name = 'GetLength'
|
||||
list[11] = LF_MEMBER, private, type = T_32PRCHAR(0470), offset = 8
|
||||
member name = 'm_data'
|
||||
list[12] = LF_MEMBER, private, type = T_USHORT(0021), offset = 12
|
||||
member name = 'm_length'
|
||||
|
||||
0x4db6 : Length = 30, Leaf = 0x1504 LF_CLASS
|
||||
# members = 16, field list type 0x4db5, CONSTRUCTOR, OVERLOAD,
|
||||
Derivation list type 0x0000, VT shape type 0x1266
|
||||
Size = 16, class name = MxString, UDT(0x00004db6)
|
||||
"""
|
||||
|
||||
|
||||
@pytest.fixture(name="parser")
|
||||
def types_parser_fixture():
|
||||
parser = CvdumpTypesParser()
|
||||
for line in TEST_LINES.split("\n"):
|
||||
parser.read_line(line)
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def test_basic_parsing(parser):
|
||||
obj = parser.keys["0x4db6"]
|
||||
assert obj["type"] == "LF_CLASS"
|
||||
assert obj["name"] == "MxString"
|
||||
assert obj["udt"] == "0x4db6"
|
||||
|
||||
assert len(parser.keys["0x4db5"]["members"]) == 2
|
||||
|
||||
|
||||
def test_scalar_types(parser):
|
||||
"""Full tests on the scalar_* methods are in another file.
|
||||
Here we are just testing the passthrough of the "T_" types."""
|
||||
assert parser.get("T_CHAR").name is None
|
||||
assert parser.get("T_CHAR").size == 1
|
||||
|
||||
assert parser.get("T_32PVOID").name is None
|
||||
assert parser.get("T_32PVOID").size == 4
|
||||
|
||||
|
||||
def test_resolve_forward_ref(parser):
|
||||
# Non-forward ref
|
||||
assert parser.get("0x22d5").name == "MxVariable"
|
||||
# Forward ref
|
||||
assert parser.get("0x14db").name == "MxString"
|
||||
assert parser.get("0x14db").size == 16
|
||||
|
||||
|
||||
def test_members(parser):
|
||||
"""Return the list of items to compare for a given complex type.
|
||||
If the class has a superclass, add those members too."""
|
||||
# MxCore field list
|
||||
mxcore_members = parser.get_scalars("0x405f")
|
||||
assert mxcore_members == [
|
||||
(0, "vftable", "T_32PVOID"),
|
||||
(4, "m_id", "T_UINT4"),
|
||||
]
|
||||
|
||||
# MxCore class id. Should be the same members
|
||||
assert mxcore_members == parser.get_scalars("0x4060")
|
||||
|
||||
# MxString field list. Should add inherited members from MxCore
|
||||
assert parser.get_scalars("0x4db5") == [
|
||||
(0, "vftable", "T_32PVOID"),
|
||||
(4, "m_id", "T_UINT4"),
|
||||
(8, "m_data", "T_32PRCHAR"),
|
||||
(12, "m_length", "T_USHORT"),
|
||||
]
|
||||
|
||||
|
||||
def test_members_recursive(parser):
|
||||
"""Make sure that we unwrap the dependency tree correctly."""
|
||||
# MxVariable field list
|
||||
assert parser.get_scalars("0x22d4") == [
|
||||
(0, "vftable", "T_32PVOID"),
|
||||
(4, "m_key.vftable", "T_32PVOID"),
|
||||
(8, "m_key.m_id", "T_UINT4"),
|
||||
(12, "m_key.m_data", "T_32PRCHAR"),
|
||||
(16, "m_key.m_length", "T_USHORT"), # with padding
|
||||
(20, "m_value.vftable", "T_32PVOID"),
|
||||
(24, "m_value.m_id", "T_UINT4"),
|
||||
(28, "m_value.m_data", "T_32PRCHAR"),
|
||||
(32, "m_value.m_length", "T_USHORT"), # with padding
|
||||
]
|
||||
|
||||
|
||||
def test_struct(parser):
|
||||
"""Basic test for converting type into struct.unpack format string."""
|
||||
# MxCore: vftable and uint32. The vftable pointer is read as uint32.
|
||||
assert parser.get_format_string("0x4060") == "<LL"
|
||||
|
||||
# _D3DVECTOR, three floats. Union types should already be removed.
|
||||
assert parser.get_format_string("0x10e1") == "<fff"
|
||||
|
||||
# MxRect32, four signed ints.
|
||||
assert parser.get_format_string("0x1214") == "<llll"
|
||||
|
||||
|
||||
def test_struct_padding(parser):
|
||||
"""Struct format string should insert padding characters 'x'
|
||||
where a value is padded to alignment size (probably 4 bytes)"""
|
||||
|
||||
# MxString, padded to 16 bytes.
|
||||
assert parser.get_format_string("0x4db6") == "<LLLHxx"
|
||||
|
||||
# MxVariable, with two MxString members.
|
||||
assert parser.get_format_string("0x22d5") == "<LLLLHxxLLLHxx"
|
||||
|
||||
|
||||
def test_array(parser):
|
||||
"""LF_ARRAY members are created dynamically based on the
|
||||
total array size and the size of one element."""
|
||||
# unsigned char[8]
|
||||
assert parser.get_scalars("0x10e4") == [
|
||||
(0, "[0]", "T_UCHAR"),
|
||||
(1, "[1]", "T_UCHAR"),
|
||||
(2, "[2]", "T_UCHAR"),
|
||||
(3, "[3]", "T_UCHAR"),
|
||||
(4, "[4]", "T_UCHAR"),
|
||||
(5, "[5]", "T_UCHAR"),
|
||||
(6, "[6]", "T_UCHAR"),
|
||||
(7, "[7]", "T_UCHAR"),
|
||||
]
|
||||
|
||||
# float[4]
|
||||
assert parser.get_scalars("0x103b") == [
|
||||
(0, "[0]", "T_REAL32"),
|
||||
(4, "[1]", "T_REAL32"),
|
||||
(8, "[2]", "T_REAL32"),
|
||||
(12, "[3]", "T_REAL32"),
|
||||
]
|
||||
|
||||
|
||||
def test_2d_array(parser):
|
||||
"""Make sure 2d array elements are named as we expect."""
|
||||
# float[4][4]
|
||||
float_array = parser.get_scalars("0x103c")
|
||||
assert len(float_array) == 16
|
||||
assert float_array[0] == (0, "[0][0]", "T_REAL32")
|
||||
assert float_array[1] == (4, "[0][1]", "T_REAL32")
|
||||
assert float_array[4] == (16, "[1][0]", "T_REAL32")
|
||||
assert float_array[-1] == (60, "[3][3]", "T_REAL32")
|
||||
|
||||
|
||||
def test_enum(parser):
|
||||
"""LF_ENUM should equal 4-byte int"""
|
||||
assert parser.get("0x3cc2").size == 4
|
||||
assert parser.get_scalars("0x3cc2") == [(0, None, "T_INT4")]
|
||||
|
||||
# Now look at an array of enum, 24 bytes
|
||||
enum_array = parser.get_scalars("0x4262")
|
||||
assert len(enum_array) == 6 # 24 / 4
|
||||
assert enum_array[0].size == 4
|
||||
|
||||
|
||||
def test_lf_pointer(parser):
|
||||
"""LF_POINTER is just a wrapper for scalar pointer type"""
|
||||
assert parser.get("0x3fab").size == 4
|
||||
# assert parser.get("0x3fab").is_pointer is True # TODO: ?
|
||||
|
||||
assert parser.get_scalars("0x3fab") == [(0, None, "T_32PVOID")]
|
||||
|
||||
|
||||
def test_key_not_exist(parser):
|
||||
"""Accessing a non-existent type id should raise our exception"""
|
||||
with pytest.raises(CvdumpKeyError):
|
||||
parser.get("0xbeef")
|
||||
|
||||
with pytest.raises(CvdumpKeyError):
|
||||
parser.get_scalars("0xbeef")
|
||||
|
||||
|
||||
def test_broken_forward_ref(parser):
|
||||
"""Raise an exception if we cannot follow a forward reference"""
|
||||
# Verify forward reference on MxCore
|
||||
parser.get("0x1220")
|
||||
|
||||
# Delete the MxCore LF_CLASS
|
||||
del parser.keys["0x4060"]
|
||||
|
||||
# Forward ref via 0x1220 will fail
|
||||
with pytest.raises(CvdumpKeyError):
|
||||
parser.get("0x1220")
|
||||
|
||||
|
||||
def test_null_forward_ref(parser):
|
||||
"""If the forward ref object is invalid and has no forward ref id,
|
||||
raise an exception."""
|
||||
# Test MxString forward reference
|
||||
parser.get("0x14db")
|
||||
|
||||
# Delete the UDT for MxString
|
||||
del parser.keys["0x14db"]["udt"]
|
||||
|
||||
# Cannot complete the forward reference lookup
|
||||
with pytest.raises(CvdumpIntegrityError):
|
||||
parser.get("0x14db")
|
||||
|
||||
|
||||
def test_broken_array_element_ref(parser):
|
||||
# Test LF_ARRAY of ROIColorAlias
|
||||
parser.get("0x19b1")
|
||||
|
||||
# Delete ROIColorAlias
|
||||
del parser.keys["0x19b0"]
|
||||
|
||||
# Type reference lookup will fail
|
||||
with pytest.raises(CvdumpKeyError):
|
||||
parser.get("0x19b1")
|
||||
|
||||
|
||||
def test_lf_modifier(parser):
|
||||
"""Is this an alias for another type?"""
|
||||
# Modifies float
|
||||
assert parser.get("0x1028").size == 4
|
||||
assert parser.get_scalars("0x1028") == [(0, None, "T_REAL32")]
|
||||
|
||||
mxrect = parser.get_scalars("0x1214")
|
||||
# Modifies MxRect32 via forward ref
|
||||
assert mxrect == parser.get_scalars("0x11f2")
|
||||
|
||||
|
||||
def test_union_members(parser):
|
||||
"""If there is a union somewhere in our dependency list, we can
|
||||
expect to see duplicated member offsets and names. This is ok for
|
||||
the TypeInfo tuple, but the list of ScalarType items should have
|
||||
unique offset to simplify comparison."""
|
||||
|
||||
# D3DVector type with duplicated offsets
|
||||
d3dvector = parser.get("0x10e1")
|
||||
assert len(d3dvector.members) == 6
|
||||
assert len([m for m in d3dvector.members if m.offset == 0]) == 2
|
||||
|
||||
# Deduplicated comparison list
|
||||
vector_items = parser.get_scalars("0x10e1")
|
||||
assert len(vector_items) == 3
|
Loading…
Reference in New Issue
Block a user