mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-11-24 14:20:17 +00:00
[XRay] Support AArch64 in compiler-rt
This patch adds XRay support in compiler-rt for AArch64 targets. This patch is one of a series: LLVM: https://reviews.llvm.org/D26412 Clang: https://reviews.llvm.org/D26415 Author: rSerge Reviewers: rengolin, dberris Subscribers: aemerson, mgorny, llvm-commits, iid_iunknown Differential Revision: https://reviews.llvm.org/D26413 llvm-svn: 287517
This commit is contained in:
parent
31761f300d
commit
bad8f0feb4
@ -161,7 +161,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
|
||||
set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
|
||||
set(ALL_ESAN_SUPPORTED_ARCH ${X86_64} ${MIPS64})
|
||||
set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64})
|
||||
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32})
|
||||
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64})
|
||||
|
||||
if(APPLE)
|
||||
include(CompilerRTDarwinUtils)
|
||||
|
@ -32,6 +32,13 @@ enum XRayEntryType { ENTRY = 0, EXIT = 1, TAIL = 2 };
|
||||
// (function entry, function exit, etc.). See the enum
|
||||
// XRayEntryType for more details.
|
||||
//
|
||||
// The user handler must handle correctly spurious calls after this handler is
|
||||
// removed or replaced with another handler, because it would be too costly for
|
||||
// XRay runtime to avoid spurious calls.
|
||||
// To prevent circular calling, the handler function itself and all its
|
||||
// direct&indirect callees must not be instrumented with XRay, which can be
|
||||
// achieved by marking them all with: __attribute__((xray_never_instrument))
|
||||
//
|
||||
// Returns 1 on success, 0 on error.
|
||||
extern int __xray_set_handler(void (*entry)(int32_t, XRayEntryType));
|
||||
|
||||
|
@ -19,6 +19,11 @@ set(arm_SOURCES
|
||||
|
||||
set(armhf_SOURCES ${arm_SOURCES})
|
||||
|
||||
set(aarch64_SOURCES
|
||||
xray_AArch64.cc
|
||||
xray_trampoline_AArch64.S
|
||||
${XRAY_SOURCES})
|
||||
|
||||
include_directories(..)
|
||||
include_directories(../../include)
|
||||
|
||||
|
105
compiler-rt/lib/xray/xray_AArch64.cc
Normal file
105
compiler-rt/lib/xray/xray_AArch64.cc
Normal file
@ -0,0 +1,105 @@
|
||||
//===-- xray_AArch64.cc -----------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a part of XRay, a dynamic runtime instrumentation system.
|
||||
//
|
||||
// Implementation of AArch64-specific routines (64-bit).
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
#include "sanitizer_common/sanitizer_common.h"
|
||||
#include "xray_defs.h"
|
||||
#include "xray_interface_internal.h"
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
|
||||
namespace __xray {
|
||||
|
||||
// The machine codes for some instructions used in runtime patching.
|
||||
enum class PatchOpcodes : uint32_t {
|
||||
PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]!
|
||||
PO_LdrW0_12 = 0x18000060, // LDR W0, #12
|
||||
PO_LdrX16_12 = 0x58000070, // LDR X16, #12
|
||||
PO_BlrX16 = 0xD63F0200, // BLR X16
|
||||
PO_LdpX0X30SP_16 = 0xA8C17BE0, // LDP X0, X30, [SP], #16
|
||||
PO_B32 = 0x14000008 // B #32
|
||||
};
|
||||
|
||||
inline static bool patchSled(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled,
|
||||
void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
|
||||
// When |Enable| == true,
|
||||
// We replace the following compile-time stub (sled):
|
||||
//
|
||||
// xray_sled_n:
|
||||
// B #32
|
||||
// 7 NOPs (24 bytes)
|
||||
//
|
||||
// With the following runtime patch:
|
||||
//
|
||||
// xray_sled_n:
|
||||
// STP X0, X30, [SP, #-16]! ; PUSH {r0, lr}
|
||||
// LDR W0, #12 ; W0 := function ID
|
||||
// LDR X16,#12 ; X16 := address of the trampoline
|
||||
// BLR X16
|
||||
// ;DATA: 32 bits of function ID
|
||||
// ;DATA: lower 32 bits of the address of the trampoline
|
||||
// ;DATA: higher 32 bits of the address of the trampoline
|
||||
// LDP X0, X30, [SP], #16 ; POP {r0, lr}
|
||||
//
|
||||
// Replacement of the first 4-byte instruction should be the last and atomic
|
||||
// operation, so that the user code which reaches the sled concurrently
|
||||
// either jumps over the whole sled, or executes the whole sled when the
|
||||
// latter is ready.
|
||||
//
|
||||
// When |Enable|==false, we set back the first instruction in the sled to be
|
||||
// B #32
|
||||
|
||||
uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
|
||||
if (Enable) {
|
||||
uint32_t *CurAddress = FirstAddress + 1;
|
||||
*CurAddress = uint32_t(PatchOpcodes::PO_LdrW0_12);
|
||||
CurAddress++;
|
||||
*CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12);
|
||||
CurAddress++;
|
||||
*CurAddress = uint32_t(PatchOpcodes::PO_BlrX16);
|
||||
CurAddress++;
|
||||
*CurAddress = FuncId;
|
||||
CurAddress++;
|
||||
*reinterpret_cast<void (**)()>(CurAddress) = TracingHook;
|
||||
CurAddress += 2;
|
||||
*CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16);
|
||||
std::atomic_store_explicit(
|
||||
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
|
||||
uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release);
|
||||
} else {
|
||||
std::atomic_store_explicit(
|
||||
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
|
||||
uint32_t(PatchOpcodes::PO_B32), std::memory_order_release);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
|
||||
return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
|
||||
}
|
||||
|
||||
bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
|
||||
return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
|
||||
}
|
||||
|
||||
bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
|
||||
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
|
||||
// FIXME: In the future we'd need to distinguish between non-tail exits and
|
||||
// tail exits for better information preservation.
|
||||
return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
|
||||
}
|
||||
|
||||
} // namespace __xray
|
@ -27,7 +27,7 @@
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#include <x86intrin.h>
|
||||
#elif defined(__arm__)
|
||||
#elif defined(__arm__) || defined(__aarch64__)
|
||||
static const int64_t NanosecondsPerSecond = 1000LL * 1000 * 1000;
|
||||
#else
|
||||
#error "Unsupported CPU Architecture"
|
||||
@ -195,7 +195,7 @@ void __xray_InMemoryRawLog(int32_t FuncId,
|
||||
} else {
|
||||
Report("Unable to determine CPU frequency for TSC accounting.");
|
||||
}
|
||||
#elif defined(__arm__)
|
||||
#elif defined(__arm__) || defined(__aarch64__)
|
||||
// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
|
||||
// not have a constant frequency like TSC on x86(_64), it may go faster
|
||||
// or slower depending on CPU turbo or power saving mode. Furthermore,
|
||||
@ -243,7 +243,7 @@ void __xray_InMemoryRawLog(int32_t FuncId,
|
||||
R.TSC = __rdtscp(&CPU);
|
||||
R.CPU = CPU;
|
||||
}
|
||||
#elif defined(__arm__)
|
||||
#elif defined(__arm__) || defined(__aarch64__)
|
||||
{
|
||||
timespec TS;
|
||||
int result = clock_gettime(CLOCK_REALTIME, &TS);
|
||||
|
@ -33,6 +33,8 @@ namespace __xray {
|
||||
static const int16_t cSledLength = 12;
|
||||
#elif defined(__arm__)
|
||||
static const int16_t cSledLength = 28;
|
||||
#elif defined(__aarch64__)
|
||||
static const int16_t cSledLength = 32;
|
||||
#else
|
||||
#error "Unsupported CPU Architecture"
|
||||
#endif /* CPU architecture */
|
||||
|
89
compiler-rt/lib/xray/xray_trampoline_AArch64.S
Normal file
89
compiler-rt/lib/xray/xray_trampoline_AArch64.S
Normal file
@ -0,0 +1,89 @@
|
||||
.text
|
||||
/* The variable containing the handler function pointer */
|
||||
.global _ZN6__xray19XRayPatchedFunctionE
|
||||
/* Word-aligned function entry point */
|
||||
.p2align 2
|
||||
/* Let C/C++ see the symbol */
|
||||
.global __xray_FunctionEntry
|
||||
.type __xray_FunctionEntry, %function
|
||||
/* In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with
|
||||
FuncId passed in W0 register. */
|
||||
__xray_FunctionEntry:
|
||||
/* Move the return address beyond the end of sled data. The 12 bytes of
|
||||
data are inserted in the code of the runtime patch, between the call
|
||||
instruction and the instruction returned into. The data contains 32
|
||||
bits of instrumented function ID and 64 bits of the address of
|
||||
the current trampoline. */
|
||||
ADD X30, X30, #12
|
||||
/* Push the registers which may be modified by the handler function */
|
||||
STP X1, X2, [SP, #-16]!
|
||||
STP X3, X4, [SP, #-16]!
|
||||
STP X5, X6, [SP, #-16]!
|
||||
STP X7, X30, [SP, #-16]!
|
||||
STP Q0, Q1, [SP, #-32]!
|
||||
STP Q2, Q3, [SP, #-32]!
|
||||
STP Q4, Q5, [SP, #-32]!
|
||||
STP Q6, Q7, [SP, #-32]!
|
||||
/* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */
|
||||
LDR X1, =_ZN6__xray19XRayPatchedFunctionE
|
||||
/* Load the handler function pointer into X2 */
|
||||
LDR X2, [X1]
|
||||
/* Handler address is nullptr if handler is not set */
|
||||
CMP X2, #0
|
||||
BEQ FunctionEntry_restore
|
||||
/* Function ID is already in W0 (the first parameter).
|
||||
X1=0 means that we are tracing an entry event */
|
||||
MOV X1, #0
|
||||
/* Call the handler with 2 parameters in W0 and X1 */
|
||||
BLR X2
|
||||
FunctionEntry_restore:
|
||||
/* Pop the saved registers */
|
||||
LDP Q6, Q7, [SP], #32
|
||||
LDP Q4, Q5, [SP], #32
|
||||
LDP Q2, Q3, [SP], #32
|
||||
LDP Q0, Q1, [SP], #32
|
||||
LDP X7, X30, [SP], #16
|
||||
LDP X5, X6, [SP], #16
|
||||
LDP X3, X4, [SP], #16
|
||||
LDP X1, X2, [SP], #16
|
||||
RET
|
||||
|
||||
/* Word-aligned function entry point */
|
||||
.p2align 2
|
||||
/* Let C/C++ see the symbol */
|
||||
.global __xray_FunctionExit
|
||||
.type __xray_FunctionExit, %function
|
||||
/* In C++ it is void extern "C" __xray_FunctionExit(uint32_t FuncId) with
|
||||
FuncId passed in W0 register. */
|
||||
__xray_FunctionExit:
|
||||
/* Move the return address beyond the end of sled data. The 12 bytes of
|
||||
data are inserted in the code of the runtime patch, between the call
|
||||
instruction and the instruction returned into. The data contains 32
|
||||
bits of instrumented function ID and 64 bits of the address of
|
||||
the current trampoline. */
|
||||
ADD X30, X30, #12
|
||||
/* Push the registers which may be modified by the handler function */
|
||||
STP X1, X2, [SP, #-16]!
|
||||
STP X3, X4, [SP, #-16]!
|
||||
STP X5, X6, [SP, #-16]!
|
||||
STP X7, X30, [SP, #-16]!
|
||||
STR Q0, [SP, #-16]!
|
||||
/* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */
|
||||
LDR X1, =_ZN6__xray19XRayPatchedFunctionE
|
||||
/* Load the handler function pointer into X2 */
|
||||
LDR X2, [X1]
|
||||
/* Handler address is nullptr if handler is not set */
|
||||
CMP X2, #0
|
||||
BEQ FunctionExit_restore
|
||||
/* Function ID is already in W0 (the first parameter).
|
||||
X1=1 means that we are tracing an exit event */
|
||||
MOV X1, #1
|
||||
/* Call the handler with 2 parameters in W0 and X1 */
|
||||
BLR X2
|
||||
FunctionExit_restore:
|
||||
LDR Q0, [SP], #16
|
||||
LDP X7, X30, [SP], #16
|
||||
LDP X5, X6, [SP], #16
|
||||
LDP X3, X4, [SP], #16
|
||||
LDP X1, X2, [SP], #16
|
||||
RET
|
Loading…
Reference in New Issue
Block a user