mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2025-01-26 11:25:27 +00:00
[sanitizer] Track architecture and UUID of modules in LoadedModule
When we enumerate loaded modules, we only track the module name and base address, which then has several problems on macOS. Dylibs and executables often have several architecture slices and not storing which architecture/UUID is actually loaded creates problems with symbolication: A file path + offset isn't enough to correctly symbolicate, since the offset can be valid in multiple slices. This is especially common for Haswell+ X86_64 machines, where x86_64h slices are preferred, but if one is not available, a regular x86_64 is loaded instead. But the same issue exists for i386 vs. x86_64 as well. This patch adds tracking of arch and UUID for each LoadedModule. At this point, this information isn't used in reports, but this is the first step. The goal is to correctly identify which slice is loaded in symbolication, and also to output this information in reports so that we can tell which exact slices were loaded in post-mortem analysis. Differential Revision: https://reviews.llvm.org/D26632 llvm-svn: 288537
This commit is contained in:
parent
96be8df23e
commit
3eb98a1318
@ -259,9 +259,18 @@ void LoadedModule::set(const char *module_name, uptr base_address) {
|
||||
base_address_ = base_address;
|
||||
}
|
||||
|
||||
void LoadedModule::set(const char *module_name, uptr base_address,
|
||||
ModuleArch arch, u8 uuid[kModuleUUIDSize]) {
|
||||
set(module_name, base_address);
|
||||
arch_ = arch;
|
||||
internal_memcpy(uuid_, uuid, sizeof(uuid_));
|
||||
}
|
||||
|
||||
void LoadedModule::clear() {
|
||||
InternalFree(full_name_);
|
||||
full_name_ = nullptr;
|
||||
arch_ = kModuleArchUnknown;
|
||||
internal_memset(uuid_, 0, kModuleUUIDSize);
|
||||
while (!ranges_.empty()) {
|
||||
AddressRange *r = ranges_.front();
|
||||
ranges_.pop_front();
|
||||
|
@ -646,18 +646,40 @@ uptr InternalLowerBound(const Container &v, uptr first, uptr last,
|
||||
return first;
|
||||
}
|
||||
|
||||
enum ModuleArch {
|
||||
kModuleArchUnknown,
|
||||
kModuleArchI386,
|
||||
kModuleArchX86_64,
|
||||
kModuleArchX86_64H,
|
||||
kModuleArchARMV6,
|
||||
kModuleArchARMV7,
|
||||
kModuleArchARMV7S,
|
||||
kModuleArchARMV7K,
|
||||
kModuleArchARM64
|
||||
};
|
||||
|
||||
const uptr kModuleUUIDSize = 16;
|
||||
|
||||
// Represents a binary loaded into virtual memory (e.g. this can be an
|
||||
// executable or a shared object).
|
||||
class LoadedModule {
|
||||
public:
|
||||
LoadedModule() : full_name_(nullptr), base_address_(0) { ranges_.clear(); }
|
||||
LoadedModule()
|
||||
: full_name_(nullptr), base_address_(0), arch_(kModuleArchUnknown) {
|
||||
internal_memset(uuid_, 0, kModuleUUIDSize);
|
||||
ranges_.clear();
|
||||
}
|
||||
void set(const char *module_name, uptr base_address);
|
||||
void set(const char *module_name, uptr base_address, ModuleArch arch,
|
||||
u8 uuid[kModuleUUIDSize]);
|
||||
void clear();
|
||||
void addAddressRange(uptr beg, uptr end, bool executable);
|
||||
bool containsAddress(uptr address) const;
|
||||
|
||||
const char *full_name() const { return full_name_; }
|
||||
uptr base_address() const { return base_address_; }
|
||||
ModuleArch arch() const { return arch_; }
|
||||
const u8 *uuid() const { return uuid_; }
|
||||
|
||||
struct AddressRange {
|
||||
AddressRange *next;
|
||||
@ -674,6 +696,8 @@ class LoadedModule {
|
||||
private:
|
||||
char *full_name_; // Owned.
|
||||
uptr base_address_;
|
||||
ModuleArch arch_;
|
||||
u8 uuid_[kModuleUUIDSize];
|
||||
IntrusiveList<AddressRange> ranges_;
|
||||
};
|
||||
|
||||
|
@ -35,8 +35,9 @@ class MemoryMappingLayout {
|
||||
public:
|
||||
explicit MemoryMappingLayout(bool cache_enabled);
|
||||
~MemoryMappingLayout();
|
||||
bool Next(uptr *start, uptr *end, uptr *offset,
|
||||
char filename[], uptr filename_size, uptr *protection);
|
||||
bool Next(uptr *start, uptr *end, uptr *offset, char filename[],
|
||||
uptr filename_size, uptr *protection, ModuleArch *arch = nullptr,
|
||||
u8 *uuid = nullptr);
|
||||
void Reset();
|
||||
// In some cases, e.g. when running under a sandbox on Linux, ASan is unable
|
||||
// to obtain the memory mappings. It should fall back to pre-cached data
|
||||
@ -65,13 +66,15 @@ class MemoryMappingLayout {
|
||||
static ProcSelfMapsBuff cached_proc_self_maps_;
|
||||
static StaticSpinMutex cache_lock_; // protects cached_proc_self_maps_.
|
||||
# elif SANITIZER_MAC
|
||||
template<u32 kLCSegment, typename SegmentCommand>
|
||||
bool NextSegmentLoad(uptr *start, uptr *end, uptr *offset,
|
||||
char filename[], uptr filename_size,
|
||||
template <u32 kLCSegment, typename SegmentCommand>
|
||||
bool NextSegmentLoad(uptr *start, uptr *end, uptr *offset, char filename[],
|
||||
uptr filename_size, ModuleArch *arch, u8 *uuid,
|
||||
uptr *protection);
|
||||
int current_image_;
|
||||
u32 current_magic_;
|
||||
u32 current_filetype_;
|
||||
ModuleArch current_arch_;
|
||||
u8 current_uuid_[kModuleUUIDSize];
|
||||
int current_load_cmd_count_;
|
||||
char *current_load_cmd_addr_;
|
||||
# endif
|
||||
|
@ -50,7 +50,9 @@ void ReadProcMaps(ProcSelfMapsBuff *proc_maps) {
|
||||
|
||||
bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
|
||||
char filename[], uptr filename_size,
|
||||
uptr *protection) {
|
||||
uptr *protection, ModuleArch *arch, u8 *uuid) {
|
||||
CHECK(!arch && "not implemented");
|
||||
CHECK(!uuid && "not implemented");
|
||||
char *last = proc_self_maps_.data + proc_self_maps_.len;
|
||||
if (current_ >= last) return false;
|
||||
uptr dummy;
|
||||
|
@ -28,7 +28,9 @@ static bool IsOneOf(char c, char c1, char c2) {
|
||||
|
||||
bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
|
||||
char filename[], uptr filename_size,
|
||||
uptr *protection) {
|
||||
uptr *protection, ModuleArch *arch, u8 *uuid) {
|
||||
CHECK(!arch && "not implemented");
|
||||
CHECK(!uuid && "not implemented");
|
||||
char *last = proc_self_maps_.data + proc_self_maps_.len;
|
||||
if (current_ >= last) return false;
|
||||
uptr dummy;
|
||||
|
@ -53,6 +53,8 @@ void MemoryMappingLayout::Reset() {
|
||||
current_load_cmd_addr_ = 0;
|
||||
current_magic_ = 0;
|
||||
current_filetype_ = 0;
|
||||
current_arch_ = kModuleArchUnknown;
|
||||
internal_memset(current_uuid_, 0, kModuleUUIDSize);
|
||||
}
|
||||
|
||||
// static
|
||||
@ -71,11 +73,12 @@ void MemoryMappingLayout::LoadFromCache() {
|
||||
// and returns the start and end addresses and file offset of the corresponding
|
||||
// segment.
|
||||
// Note that the segment addresses are not necessarily sorted.
|
||||
template<u32 kLCSegment, typename SegmentCommand>
|
||||
bool MemoryMappingLayout::NextSegmentLoad(
|
||||
uptr *start, uptr *end, uptr *offset,
|
||||
char filename[], uptr filename_size, uptr *protection) {
|
||||
const char* lc = current_load_cmd_addr_;
|
||||
template <u32 kLCSegment, typename SegmentCommand>
|
||||
bool MemoryMappingLayout::NextSegmentLoad(uptr *start, uptr *end, uptr *offset,
|
||||
char filename[], uptr filename_size,
|
||||
ModuleArch *arch, u8 *uuid,
|
||||
uptr *protection) {
|
||||
const char *lc = current_load_cmd_addr_;
|
||||
current_load_cmd_addr_ += ((const load_command *)lc)->cmdsize;
|
||||
if (((const load_command *)lc)->cmd == kLCSegment) {
|
||||
const sptr dlloff = _dyld_get_image_vmaddr_slide(current_image_);
|
||||
@ -97,14 +100,61 @@ bool MemoryMappingLayout::NextSegmentLoad(
|
||||
internal_strncpy(filename, _dyld_get_image_name(current_image_),
|
||||
filename_size);
|
||||
}
|
||||
if (arch) {
|
||||
*arch = current_arch_;
|
||||
}
|
||||
if (uuid) {
|
||||
internal_memcpy(uuid, current_uuid_, kModuleUUIDSize);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) {
|
||||
cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK;
|
||||
switch (cputype) {
|
||||
case CPU_TYPE_I386:
|
||||
return kModuleArchI386;
|
||||
case CPU_TYPE_X86_64:
|
||||
if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64;
|
||||
if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H;
|
||||
CHECK(0 && "Invalid subtype of x86_64");
|
||||
return kModuleArchUnknown;
|
||||
case CPU_TYPE_ARM:
|
||||
if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6;
|
||||
if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7;
|
||||
if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S;
|
||||
if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K;
|
||||
CHECK(0 && "Invalid subtype of ARM");
|
||||
return kModuleArchUnknown;
|
||||
case CPU_TYPE_ARM64:
|
||||
return kModuleArchARM64;
|
||||
default:
|
||||
CHECK(0 && "Invalid CPU type");
|
||||
return kModuleArchUnknown;
|
||||
}
|
||||
}
|
||||
|
||||
static void FindUUID(const load_command *first_lc, u8 *uuid_output) {
|
||||
const load_command *current_lc = first_lc;
|
||||
while (1) {
|
||||
if (current_lc->cmd == 0) return;
|
||||
if (current_lc->cmd == LC_UUID) {
|
||||
const uuid_command *uuid_lc = (const uuid_command *)current_lc;
|
||||
const uint8_t *uuid = &uuid_lc->uuid[0];
|
||||
internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
|
||||
return;
|
||||
}
|
||||
|
||||
current_lc =
|
||||
(const load_command *)(((char *)current_lc) + current_lc->cmdsize);
|
||||
}
|
||||
}
|
||||
|
||||
bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
|
||||
char filename[], uptr filename_size,
|
||||
uptr *protection) {
|
||||
uptr *protection, ModuleArch *arch, u8 *uuid) {
|
||||
for (; current_image_ >= 0; current_image_--) {
|
||||
const mach_header* hdr = _dyld_get_image_header(current_image_);
|
||||
if (!hdr) continue;
|
||||
@ -113,6 +163,7 @@ bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
|
||||
current_load_cmd_count_ = hdr->ncmds;
|
||||
current_magic_ = hdr->magic;
|
||||
current_filetype_ = hdr->filetype;
|
||||
current_arch_ = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
|
||||
switch (current_magic_) {
|
||||
#ifdef MH_MAGIC_64
|
||||
case MH_MAGIC_64: {
|
||||
@ -130,20 +181,24 @@ bool MemoryMappingLayout::Next(uptr *start, uptr *end, uptr *offset,
|
||||
}
|
||||
}
|
||||
|
||||
FindUUID((const load_command *)current_load_cmd_addr_, ¤t_uuid_[0]);
|
||||
|
||||
for (; current_load_cmd_count_ >= 0; current_load_cmd_count_--) {
|
||||
switch (current_magic_) {
|
||||
// current_magic_ may be only one of MH_MAGIC, MH_MAGIC_64.
|
||||
#ifdef MH_MAGIC_64
|
||||
case MH_MAGIC_64: {
|
||||
if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
|
||||
start, end, offset, filename, filename_size, protection))
|
||||
start, end, offset, filename, filename_size, arch, uuid,
|
||||
protection))
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
case MH_MAGIC: {
|
||||
if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
|
||||
start, end, offset, filename, filename_size, protection))
|
||||
start, end, offset, filename, filename_size, arch, uuid,
|
||||
protection))
|
||||
return true;
|
||||
break;
|
||||
}
|
||||
@ -159,9 +214,11 @@ void MemoryMappingLayout::DumpListOfModules(
|
||||
InternalMmapVector<LoadedModule> *modules) {
|
||||
Reset();
|
||||
uptr cur_beg, cur_end, prot;
|
||||
ModuleArch cur_arch;
|
||||
u8 cur_uuid[kModuleUUIDSize];
|
||||
InternalScopedString module_name(kMaxPathLength);
|
||||
for (uptr i = 0; Next(&cur_beg, &cur_end, 0, module_name.data(),
|
||||
module_name.size(), &prot);
|
||||
module_name.size(), &prot, &cur_arch, &cur_uuid[0]);
|
||||
i++) {
|
||||
const char *cur_name = module_name.data();
|
||||
if (cur_name[0] == '\0')
|
||||
@ -173,7 +230,7 @@ void MemoryMappingLayout::DumpListOfModules(
|
||||
} else {
|
||||
modules->push_back(LoadedModule());
|
||||
cur_module = &modules->back();
|
||||
cur_module->set(cur_name, cur_beg);
|
||||
cur_module->set(cur_name, cur_beg, cur_arch, cur_uuid);
|
||||
}
|
||||
cur_module->addAddressRange(cur_beg, cur_end, prot & kProtectionExecute);
|
||||
}
|
||||
|
@ -52,5 +52,26 @@ TEST(MemoryMappingLayout, DumpListOfModules) {
|
||||
EXPECT_TRUE(found);
|
||||
}
|
||||
|
||||
TEST(MemoryMapping, LoadedModuleArchAndUUID) {
|
||||
if (SANITIZER_MAC) {
|
||||
MemoryMappingLayout memory_mapping(false);
|
||||
const uptr kMaxModules = 100;
|
||||
InternalMmapVector<LoadedModule> modules(kMaxModules);
|
||||
memory_mapping.DumpListOfModules(&modules);
|
||||
for (uptr i = 0; i < modules.size(); ++i) {
|
||||
ModuleArch arch = modules[i].arch();
|
||||
// Darwin unit tests are only run on i386/x86_64/x86_64h.
|
||||
if (SANITIZER_WORDSIZE == 32) {
|
||||
EXPECT_EQ(arch, kModuleArchI386);
|
||||
} else if (SANITIZER_WORDSIZE == 64) {
|
||||
EXPECT_TRUE(arch == kModuleArchX86_64 || arch == kModuleArchX86_64H);
|
||||
}
|
||||
const u8 *uuid = modules[i].uuid();
|
||||
u8 null_uuid[kModuleUUIDSize] = {0};
|
||||
EXPECT_NE(memcmp(null_uuid, uuid, kModuleUUIDSize), 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __sanitizer
|
||||
#endif // !defined(_WIN32)
|
||||
|
Loading…
x
Reference in New Issue
Block a user