[XRay] Support AArch64 in compiler-rt

This patch adds XRay support in compiler-rt for AArch64 targets.
This patch is one of a series:

LLVM: https://reviews.llvm.org/D26412
Clang: https://reviews.llvm.org/D26415

Author: rSerge

Reviewers: rengolin, dberris

Subscribers: aemerson, mgorny, llvm-commits, iid_iunknown

Differential Revision: https://reviews.llvm.org/D26413

llvm-svn: 287517
This commit is contained in:
Dean Michael Berris 2016-11-21 03:20:43 +00:00
parent 31761f300d
commit bad8f0feb4
7 changed files with 212 additions and 4 deletions

View File

@ -161,7 +161,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
set(ALL_ESAN_SUPPORTED_ARCH ${X86_64} ${MIPS64})
set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64})
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32})
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64})
if(APPLE)
include(CompilerRTDarwinUtils)

View File

@ -32,6 +32,13 @@ enum XRayEntryType { ENTRY = 0, EXIT = 1, TAIL = 2 };
// (function entry, function exit, etc.). See the enum
// XRayEntryType for more details.
//
// The user handler must handle correctly spurious calls after this handler is
// removed or replaced with another handler, because it would be too costly for
// XRay runtime to avoid spurious calls.
// To prevent circular calling, the handler function itself and all its
// direct&indirect callees must not be instrumented with XRay, which can be
// achieved by marking them all with: __attribute__((xray_never_instrument))
//
// Returns 1 on success, 0 on error.
extern int __xray_set_handler(void (*entry)(int32_t, XRayEntryType));

View File

@ -19,6 +19,11 @@ set(arm_SOURCES
set(armhf_SOURCES ${arm_SOURCES})
set(aarch64_SOURCES
xray_AArch64.cc
xray_trampoline_AArch64.S
${XRAY_SOURCES})
include_directories(..)
include_directories(../../include)

View File

@ -0,0 +1,105 @@
//===-- xray_AArch64.cc -----------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// Implementation of AArch64-specific routines (64-bit).
//
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_common.h"
#include "xray_defs.h"
#include "xray_interface_internal.h"
#include <atomic>
#include <cassert>
namespace __xray {
// The machine codes for some instructions used in runtime patching.
enum class PatchOpcodes : uint32_t {
PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]!
PO_LdrW0_12 = 0x18000060, // LDR W0, #12
PO_LdrX16_12 = 0x58000070, // LDR X16, #12
PO_BlrX16 = 0xD63F0200, // BLR X16
PO_LdpX0X30SP_16 = 0xA8C17BE0, // LDP X0, X30, [SP], #16
PO_B32 = 0x14000008 // B #32
};
inline static bool patchSled(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled,
void (*TracingHook)()) XRAY_NEVER_INSTRUMENT {
// When |Enable| == true,
// We replace the following compile-time stub (sled):
//
// xray_sled_n:
// B #32
// 7 NOPs (24 bytes)
//
// With the following runtime patch:
//
// xray_sled_n:
// STP X0, X30, [SP, #-16]! ; PUSH {r0, lr}
// LDR W0, #12 ; W0 := function ID
// LDR X16,#12 ; X16 := address of the trampoline
// BLR X16
// ;DATA: 32 bits of function ID
// ;DATA: lower 32 bits of the address of the trampoline
// ;DATA: higher 32 bits of the address of the trampoline
// LDP X0, X30, [SP], #16 ; POP {r0, lr}
//
// Replacement of the first 4-byte instruction should be the last and atomic
// operation, so that the user code which reaches the sled concurrently
// either jumps over the whole sled, or executes the whole sled when the
// latter is ready.
//
// When |Enable|==false, we set back the first instruction in the sled to be
// B #32
uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
if (Enable) {
uint32_t *CurAddress = FirstAddress + 1;
*CurAddress = uint32_t(PatchOpcodes::PO_LdrW0_12);
CurAddress++;
*CurAddress = uint32_t(PatchOpcodes::PO_LdrX16_12);
CurAddress++;
*CurAddress = uint32_t(PatchOpcodes::PO_BlrX16);
CurAddress++;
*CurAddress = FuncId;
CurAddress++;
*reinterpret_cast<void (**)()>(CurAddress) = TracingHook;
CurAddress += 2;
*CurAddress = uint32_t(PatchOpcodes::PO_LdpX0X30SP_16);
std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
uint32_t(PatchOpcodes::PO_StpX0X30SP_m16e), std::memory_order_release);
} else {
std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
uint32_t(PatchOpcodes::PO_B32), std::memory_order_release);
}
return true;
}
bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
}
bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
}
bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT {
// FIXME: In the future we'd need to distinguish between non-tail exits and
// tail exits for better information preservation.
return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
}
} // namespace __xray

View File

@ -27,7 +27,7 @@
#if defined(__x86_64__)
#include <x86intrin.h>
#elif defined(__arm__)
#elif defined(__arm__) || defined(__aarch64__)
static const int64_t NanosecondsPerSecond = 1000LL * 1000 * 1000;
#else
#error "Unsupported CPU Architecture"
@ -195,7 +195,7 @@ void __xray_InMemoryRawLog(int32_t FuncId,
} else {
Report("Unable to determine CPU frequency for TSC accounting.");
}
#elif defined(__arm__)
#elif defined(__arm__) || defined(__aarch64__)
// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
// not have a constant frequency like TSC on x86(_64), it may go faster
// or slower depending on CPU turbo or power saving mode. Furthermore,
@ -243,7 +243,7 @@ void __xray_InMemoryRawLog(int32_t FuncId,
R.TSC = __rdtscp(&CPU);
R.CPU = CPU;
}
#elif defined(__arm__)
#elif defined(__arm__) || defined(__aarch64__)
{
timespec TS;
int result = clock_gettime(CLOCK_REALTIME, &TS);

View File

@ -33,6 +33,8 @@ namespace __xray {
static const int16_t cSledLength = 12;
#elif defined(__arm__)
static const int16_t cSledLength = 28;
#elif defined(__aarch64__)
static const int16_t cSledLength = 32;
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */

View File

@ -0,0 +1,89 @@
.text
/* The variable containing the handler function pointer */
.global _ZN6__xray19XRayPatchedFunctionE
/* Word-aligned function entry point */
.p2align 2
/* Let C/C++ see the symbol */
.global __xray_FunctionEntry
.type __xray_FunctionEntry, %function
/* In C++ it is void extern "C" __xray_FunctionEntry(uint32_t FuncId) with
FuncId passed in W0 register. */
__xray_FunctionEntry:
/* Move the return address beyond the end of sled data. The 12 bytes of
data are inserted in the code of the runtime patch, between the call
instruction and the instruction returned into. The data contains 32
bits of instrumented function ID and 64 bits of the address of
the current trampoline. */
ADD X30, X30, #12
/* Push the registers which may be modified by the handler function */
STP X1, X2, [SP, #-16]!
STP X3, X4, [SP, #-16]!
STP X5, X6, [SP, #-16]!
STP X7, X30, [SP, #-16]!
STP Q0, Q1, [SP, #-32]!
STP Q2, Q3, [SP, #-32]!
STP Q4, Q5, [SP, #-32]!
STP Q6, Q7, [SP, #-32]!
/* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */
LDR X1, =_ZN6__xray19XRayPatchedFunctionE
/* Load the handler function pointer into X2 */
LDR X2, [X1]
/* Handler address is nullptr if handler is not set */
CMP X2, #0
BEQ FunctionEntry_restore
/* Function ID is already in W0 (the first parameter).
X1=0 means that we are tracing an entry event */
MOV X1, #0
/* Call the handler with 2 parameters in W0 and X1 */
BLR X2
FunctionEntry_restore:
/* Pop the saved registers */
LDP Q6, Q7, [SP], #32
LDP Q4, Q5, [SP], #32
LDP Q2, Q3, [SP], #32
LDP Q0, Q1, [SP], #32
LDP X7, X30, [SP], #16
LDP X5, X6, [SP], #16
LDP X3, X4, [SP], #16
LDP X1, X2, [SP], #16
RET
/* Word-aligned function entry point */
.p2align 2
/* Let C/C++ see the symbol */
.global __xray_FunctionExit
.type __xray_FunctionExit, %function
/* In C++ it is void extern "C" __xray_FunctionExit(uint32_t FuncId) with
FuncId passed in W0 register. */
__xray_FunctionExit:
/* Move the return address beyond the end of sled data. The 12 bytes of
data are inserted in the code of the runtime patch, between the call
instruction and the instruction returned into. The data contains 32
bits of instrumented function ID and 64 bits of the address of
the current trampoline. */
ADD X30, X30, #12
/* Push the registers which may be modified by the handler function */
STP X1, X2, [SP, #-16]!
STP X3, X4, [SP, #-16]!
STP X5, X6, [SP, #-16]!
STP X7, X30, [SP, #-16]!
STR Q0, [SP, #-16]!
/* Load the address of _ZN6__xray19XRayPatchedFunctionE into X1 */
LDR X1, =_ZN6__xray19XRayPatchedFunctionE
/* Load the handler function pointer into X2 */
LDR X2, [X1]
/* Handler address is nullptr if handler is not set */
CMP X2, #0
BEQ FunctionExit_restore
/* Function ID is already in W0 (the first parameter).
X1=1 means that we are tracing an exit event */
MOV X1, #1
/* Call the handler with 2 parameters in W0 and X1 */
BLR X2
FunctionExit_restore:
LDR Q0, [SP], #16
LDP X7, X30, [SP], #16
LDP X5, X6, [SP], #16
LDP X3, X4, [SP], #16
LDP X1, X2, [SP], #16
RET