From c154376f639b3364b506fb24cedd5641cac34b9b Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Wed, 4 Apr 2018 21:55:44 +0000 Subject: [PATCH] AArch64: Implement support for the shadowcallstack attribute. The implementation of shadow call stack on aarch64 is quite different to the implementation on x86_64. Instead of reserving a segment register for the shadow call stack, we reserve the platform register, x18. Any function that spills lr to sp also spills it to the shadow call stack, a pointer to which is stored in x18. Differential Revision: https://reviews.llvm.org/D45239 llvm-svn: 329236 --- include/llvm/Support/TargetParser.h | 2 + lib/Support/TargetParser.cpp | 4 + .../AArch64/AArch64CallingConvention.td | 15 ++++ lib/Target/AArch64/AArch64FrameLowering.cpp | 76 +++++++++++++++++-- lib/Target/AArch64/AArch64RegisterInfo.cpp | 16 ++-- lib/Target/AArch64/AArch64Subtarget.cpp | 5 +- test/CodeGen/AArch64/shadow-call-stack.ll | 47 ++++++++++++ 7 files changed, 152 insertions(+), 13 deletions(-) create mode 100644 test/CodeGen/AArch64/shadow-call-stack.ll diff --git a/include/llvm/Support/TargetParser.h b/include/llvm/Support/TargetParser.h index 8fba995948e..74ec17445ca 100644 --- a/include/llvm/Support/TargetParser.h +++ b/include/llvm/Support/TargetParser.h @@ -212,6 +212,8 @@ ARM::EndianKind parseArchEndian(StringRef Arch); ARM::ProfileKind parseArchProfile(StringRef Arch); unsigned parseArchVersion(StringRef Arch); +bool isX18ReservedByDefault(const Triple &TT); + } // namespace AArch64 namespace X86 { diff --git a/lib/Support/TargetParser.cpp b/lib/Support/TargetParser.cpp index e38121d6445..cadb3efc063 100644 --- a/lib/Support/TargetParser.cpp +++ b/lib/Support/TargetParser.cpp @@ -917,3 +917,7 @@ ARM::ProfileKind AArch64::parseArchProfile(StringRef Arch) { unsigned llvm::AArch64::parseArchVersion(StringRef Arch) { return ARM::parseArchVersion(Arch); } + +bool llvm::AArch64::isX18ReservedByDefault(const Triple &TT) { + return TT.isOSDarwin() || TT.isOSFuchsia() || TT.isOSWindows(); +} diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index e5767313289..30492003df1 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -349,3 +349,18 @@ def CSR_AArch64_StackProbe_Windows : CalleeSavedRegs<(add (sequence "X%u", 0, 15), (sequence "X%u", 18, 28), FP, SP, (sequence "Q%u", 0, 31))>; + +// Variants of the standard calling conventions for shadow call stack. +// These all preserve x18 in addition to any other registers. +def CSR_AArch64_NoRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>; +def CSR_AArch64_AllRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>; +def CSR_AArch64_CXX_TLS_Darwin_SCS + : CalleeSavedRegs<(add CSR_AArch64_CXX_TLS_Darwin, X18)>; +def CSR_AArch64_AAPCS_SwiftError_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>; +def CSR_AArch64_RT_MostRegs_SCS + : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>; +def CSR_AArch64_AAPCS_SCS + : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>; diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index 22a10511d37..48c0916c74c 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -414,6 +414,14 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { + // Ignore instructions that do not operate on SP, i.e. shadow call stack + // instructions. + while (MBBI->getOpcode() == AArch64::STRXpost || + MBBI->getOpcode() == AArch64::LDRXpre) { + assert(MBBI->getOperand(0).getReg() != AArch64::SP); + ++MBBI; + } + unsigned NewOpc; bool NewIsUnscaled = false; switch (MBBI->getOpcode()) { @@ -481,6 +489,14 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, unsigned LocalStackSize) { unsigned Opc = MI.getOpcode(); + + // Ignore instructions that do not operate on SP, i.e. shadow call stack + // instructions. + if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) { + assert(MI.getOperand(0).getReg() != AArch64::SP); + return; + } + (void)Opc; assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi || Opc == AArch64::STRXui || Opc == AArch64::STRDui || @@ -935,6 +951,18 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // assumes the SP is at the same location as it was after the callee-save save // code in the prologue. if (AfterCSRPopSize) { + // Find an insertion point for the first ldp so that it goes before the + // shadow call stack epilog instruction. This ensures that the restore of + // lr from x18 is placed after the restore from sp. + auto FirstSPPopI = MBB.getFirstTerminator(); + while (FirstSPPopI != Begin) { + auto Prev = std::prev(FirstSPPopI); + if (Prev->getOpcode() != AArch64::LDRXpre || + Prev->getOperand(0).getReg() == AArch64::SP) + break; + FirstSPPopI = Prev; + } + // Sometimes (when we restore in the same order as we save), we can end up // with code like this: // @@ -949,7 +977,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // a post-index ldp. // If we managed to grab the first pop instruction, move it to the end. if (LastPopI != Begin) - MBB.splice(MBB.getFirstTerminator(), &MBB, LastPopI); + MBB.splice(FirstSPPopI, &MBB, LastPopI); // We should end up with something like this now: // // ldp x24, x23, [sp, #16] @@ -962,7 +990,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, // // ldp x26, x25, [sp], #64 // - emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, + emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, AfterCSRPopSize, TII, MachineInstr::FrameDestroy); } } @@ -1081,7 +1109,8 @@ struct RegPairInfo { static void computeCalleeSaveRegisterPairs( MachineFunction &MF, const std::vector &CSI, - const TargetRegisterInfo *TRI, SmallVectorImpl &RegPairs) { + const TargetRegisterInfo *TRI, SmallVectorImpl &RegPairs, + bool &NeedShadowCallStackProlog) { if (CSI.empty()) return; @@ -1115,6 +1144,15 @@ static void computeCalleeSaveRegisterPairs( RPI.Reg2 = NextReg; } + // If either of the registers to be saved is the lr register, it means that + // we also need to save lr in the shadow call stack. + if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) && + MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) { + if (!MF.getSubtarget().isX18Reserved()) + report_fatal_error("Must reserve x18 to use shadow call stack"); + NeedShadowCallStackProlog = true; + } + // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store // pair instructions directly. Assert if we see anything otherwise. @@ -1165,9 +1203,24 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( DebugLoc DL; SmallVector RegPairs; - computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); + bool NeedShadowCallStackProlog = false; + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, + NeedShadowCallStackProlog); const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (NeedShadowCallStackProlog) { + // Shadow call stack prolog: str x30, [x18], #8 + BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost)) + .addReg(AArch64::X18, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::X18) + .addImm(8) + .setMIFlag(MachineInstr::FrameSetup); + + // This instruction also makes x18 live-in to the entry block. + MBB.addLiveIn(AArch64::X18); + } + for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; @@ -1231,7 +1284,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( if (MI != MBB.end()) DL = MI->getDebugLoc(); - computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); + bool NeedShadowCallStackProlog = false; + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, + NeedShadowCallStackProlog); auto EmitMI = [&](const RegPairInfo &RPI) { unsigned Reg1 = RPI.Reg1; @@ -1280,6 +1335,17 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters( else for (const RegPairInfo &RPI : RegPairs) EmitMI(RPI); + + if (NeedShadowCallStackProlog) { + // Shadow call stack epilog: ldr x30, [x18, #-8]! + BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre)) + .addReg(AArch64::X18, RegState::Define) + .addReg(AArch64::LR, RegState::Define) + .addReg(AArch64::X18) + .addImm(-8) + .setMIFlag(MachineInstr::FrameDestroy); + } + return true; } diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 933cd819706..84f1500609a 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -75,21 +75,25 @@ const MCPhysReg *AArch64RegisterInfo::getCalleeSavedRegsViaCopy( const uint32_t * AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { + bool SCS = MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack); if (CC == CallingConv::GHC) // This is academic because all GHC calls are (supposed to be) tail calls - return CSR_AArch64_NoRegs_RegMask; + return SCS ? CSR_AArch64_NoRegs_SCS_RegMask : CSR_AArch64_NoRegs_RegMask; if (CC == CallingConv::AnyReg) - return CSR_AArch64_AllRegs_RegMask; + return SCS ? CSR_AArch64_AllRegs_SCS_RegMask : CSR_AArch64_AllRegs_RegMask; if (CC == CallingConv::CXX_FAST_TLS) - return CSR_AArch64_CXX_TLS_Darwin_RegMask; + return SCS ? CSR_AArch64_CXX_TLS_Darwin_SCS_RegMask + : CSR_AArch64_CXX_TLS_Darwin_RegMask; if (MF.getSubtarget().getTargetLowering() ->supportSwiftError() && MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError)) - return CSR_AArch64_AAPCS_SwiftError_RegMask; + return SCS ? CSR_AArch64_AAPCS_SwiftError_SCS_RegMask + : CSR_AArch64_AAPCS_SwiftError_RegMask; if (CC == CallingConv::PreserveMost) - return CSR_AArch64_RT_MostRegs_RegMask; + return SCS ? CSR_AArch64_RT_MostRegs_SCS_RegMask + : CSR_AArch64_RT_MostRegs_RegMask; else - return CSR_AArch64_AAPCS_RegMask; + return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask; } const uint32_t *AArch64RegisterInfo::getTLSCallPreservedMask() const { diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index 5f3a265d044..04bb90d30d6 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/Support/TargetParser.h" using namespace llvm; @@ -151,8 +152,8 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), - ReserveX18(TT.isOSDarwin() || TT.isOSFuchsia() || TT.isOSWindows()), - IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(), + ReserveX18(AArch64::isX18ReservedByDefault(TT)), IsLittle(LittleEndian), + TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), TLInfo(TM, *this) { CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering())); diff --git a/test/CodeGen/AArch64/shadow-call-stack.ll b/test/CodeGen/AArch64/shadow-call-stack.ll new file mode 100644 index 00000000000..dbd44fd3cd1 --- /dev/null +++ b/test/CodeGen/AArch64/shadow-call-stack.ll @@ -0,0 +1,47 @@ +; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu -mattr=+reserve-x18 | FileCheck %s + +define void @f1() shadowcallstack { + ; CHECK: f1: + ; CHECK-NOT: x18 + ; CHECK: ret + ret void +} + +declare void @foo() + +define void @f2() shadowcallstack { + ; CHECK: f2: + ; CHECK-NOT: x18 + ; CHECK: b foo + tail call void @foo() + ret void +} + +declare i32 @bar() + +define i32 @f3() shadowcallstack { + ; CHECK: f3: + ; CHECK: str x30, [x18], #8 + ; CHECK: str x30, [sp, #-16]! + %res = call i32 @bar() + %res1 = add i32 %res, 1 + ; CHECK: ldr x30, [sp], #16 + ; CHECK: ldr x30, [x18, #-8]! + ; CHECK: ret + ret i32 %res +} + +define i32 @f4() shadowcallstack { + ; CHECK: f4: + %res1 = call i32 @bar() + %res2 = call i32 @bar() + %res3 = call i32 @bar() + %res4 = call i32 @bar() + %res12 = add i32 %res1, %res2 + %res34 = add i32 %res3, %res4 + %res1234 = add i32 %res12, %res34 + ; CHECK: ldp {{.*}}x30, [sp + ; CHECK: ldr x30, [x18, #-8]! + ; CHECK: ret + ret i32 %res1234 +}