[XRay][compiler-rt+llvm] Update XRay register stashing semantics

Summary:
This change expands the amount of registers stashed by the entry and
`__xray_CustomEvent` trampolines.

We've found that since the `__xray_CustomEvent` trampoline calls can show up in
situations where the scratch registers are being used, and since we don't
typically want to affect the code-gen around the disabled
`__xray_customevent(...)` intrinsic calls, that we need to save and restore the
state of even the scratch registers in the handling of these custom events.

Reviewers: pcc, pelikan, dblaikie, eizan, kpw, echristo, chandlerc

Reviewed By: echristo

Subscribers: chandlerc, echristo, hiraditya, davide, dblaikie, llvm-commits

Differential Revision: https://reviews.llvm.org/D40894

llvm-svn: 323940
This commit is contained in:
Dean Michael Berris 2018-02-01 02:21:54 +00:00
parent 32b615c2a1
commit cdca0730be
5 changed files with 71 additions and 43 deletions

View File

@ -19,47 +19,56 @@
.macro SAVE_REGISTERS
subq $192, %rsp
CFI_DEF_CFA_OFFSET(200)
// At this point, the stack pointer should be aligned to an 8-byte boundary,
// because any call instructions that come after this will add another 8
// bytes and therefore align it to 16-bytes.
movq %rbp, 184(%rsp)
movupd %xmm0, 168(%rsp)
movupd %xmm1, 152(%rsp)
movupd %xmm2, 136(%rsp)
movupd %xmm3, 120(%rsp)
movupd %xmm4, 104(%rsp)
movupd %xmm5, 88(%rsp)
movupd %xmm6, 72(%rsp)
movupd %xmm7, 56(%rsp)
movq %rdi, 48(%rsp)
movq %rax, 40(%rsp)
movq %rdx, 32(%rsp)
movq %rsi, 24(%rsp)
movq %rcx, 16(%rsp)
movq %r8, 8(%rsp)
movq %r9, 0(%rsp)
subq $240, %rsp
CFI_DEF_CFA_OFFSET(248)
movq %rbp, 232(%rsp)
movupd %xmm0, 216(%rsp)
movupd %xmm1, 200(%rsp)
movupd %xmm2, 184(%rsp)
movupd %xmm3, 168(%rsp)
movupd %xmm4, 152(%rsp)
movupd %xmm5, 136(%rsp)
movupd %xmm6, 120(%rsp)
movupd %xmm7, 104(%rsp)
movq %rdi, 96(%rsp)
movq %rax, 88(%rsp)
movq %rdx, 80(%rsp)
movq %rsi, 72(%rsp)
movq %rcx, 64(%rsp)
movq %r8, 56(%rsp)
movq %r9, 48(%rsp)
movq %r10, 40(%rsp)
movq %r11, 32(%rsp)
movq %r12, 24(%rsp)
movq %r13, 16(%rsp)
movq %r14, 8(%rsp)
movq %r15, 0(%rsp)
.endm
.macro RESTORE_REGISTERS
movq 184(%rsp), %rbp
movupd 168(%rsp), %xmm0
movupd 152(%rsp), %xmm1
movupd 136(%rsp), %xmm2
movupd 120(%rsp), %xmm3
movupd 104(%rsp), %xmm4
movupd 88(%rsp), %xmm5
movupd 72(%rsp) , %xmm6
movupd 56(%rsp) , %xmm7
movq 48(%rsp), %rdi
movq 40(%rsp), %rax
movq 32(%rsp), %rdx
movq 24(%rsp), %rsi
movq 16(%rsp), %rcx
movq 8(%rsp), %r8
movq 0(%rsp), %r9
addq $192, %rsp
movq 232(%rsp), %rbp
movupd 216(%rsp), %xmm0
movupd 200(%rsp), %xmm1
movupd 184(%rsp), %xmm2
movupd 168(%rsp), %xmm3
movupd 152(%rsp), %xmm4
movupd 136(%rsp), %xmm5
movupd 120(%rsp) , %xmm6
movupd 104(%rsp) , %xmm7
movq 96(%rsp), %rdi
movq 88(%rsp), %rax
movq 80(%rsp), %rdx
movq 72(%rsp), %rsi
movq 64(%rsp), %rcx
movq 56(%rsp), %r8
movq 48(%rsp), %r9
movq 40(%rsp), %r10
movq 32(%rsp), %r11
movq 24(%rsp), %r12
movq 16(%rsp), %r13
movq 8(%rsp), %r14
movq 0(%rsp), %r15
addq $240, %rsp
CFI_DEF_CFA_OFFSET(8)
.endm
@ -100,7 +109,7 @@ ASM_SYMBOL(__xray_FunctionEntry):
testq %rax, %rax
je .Ltmp0
// The patched function prolog puts its xray_instr_map index into %r10d.
// The patched function prologue puts its xray_instr_map index into %r10d.
movl %r10d, %edi
xor %esi,%esi
ALIGNED_CALL_RAX
@ -220,8 +229,7 @@ ASM_SYMBOL(__xray_CustomEvent):
SAVE_REGISTERS
// We take two arguments to this trampoline, which should be in rdi and rsi
// already. We also make sure that we stash %rax because we use that register
// to call the logging handler.
// already.
movq ASM_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax
testq %rax,%rax
je .LcustomEventCleanup

View File

@ -2532,6 +2532,11 @@ protected:
/// sequence of memory operands that is recognized by PrologEpilogInserter.
MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
MachineBasicBlock *MBB) const;
/// Replace/modify the XRay custom event operands with target-dependent
/// details.
MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI,
MachineBasicBlock *MBB) const;
};
/// This class defines information used to lower LLVM code to legal SelectionDAG

View File

@ -874,6 +874,7 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {
TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
for (auto &MO : Ops)
MIB.add(MO);
// Insert the Patchable Event Call instruction, that gets lowered properly.
return true;
}

View File

@ -985,6 +985,21 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
return MBB;
}
MachineBasicBlock *
TargetLoweringBase::emitXRayCustomEvent(MachineInstr &MI,
MachineBasicBlock *MBB) const {
assert(MI.getOpcode() == TargetOpcode::PATCHABLE_EVENT_CALL &&
"Called emitXRayCustomEvent on the wrong MI!");
auto &MF = *MI.getMF();
auto MIB = BuildMI(MF, MI.getDebugLoc(), MI.getDesc());
for (unsigned OpIdx = 0; OpIdx != MI.getNumOperands(); ++OpIdx)
MIB.add(MI.getOperand(OpIdx));
MBB->insert(MachineBasicBlock::iterator(MI), MIB);
MI.eraseFromParent();
return MBB;
}
/// findRepresentativeClass - Return the largest legal super-reg register class
/// of the register class for the specified type and its associated "cost".
// This function is in TargetLowering because it uses RegClassForVT which would

View File

@ -27878,8 +27878,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitPatchPoint(MI, BB);
case TargetOpcode::PATCHABLE_EVENT_CALL:
// Do nothing here, handle in xray instrumentation pass.
return BB;
return emitXRayCustomEvent(MI, BB);
case X86::LCMPXCHG8B: {
const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();