diff --git a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp index 9b598d76561..8a6253d7681 100644 --- a/lib/Target/ARM64/ARM64TargetTransformInfo.cpp +++ b/lib/Target/ARM64/ARM64TargetTransformInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/CostTable.h" #include "llvm/Target/TargetLowering.h" +#include using namespace llvm; // Declare the pass initialization routine locally as target-specific passes @@ -71,8 +72,12 @@ public: /// \name Scalar TTI Implementations /// @{ - + unsigned getIntImmCost(int64_t Val) const; unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; + unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, + Type *Ty) const override; + unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, + Type *Ty) const override; PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override; /// @} @@ -128,26 +133,139 @@ llvm::createARM64TargetTransformInfoPass(const ARM64TargetMachine *TM) { return new ARM64TTI(TM); } +/// \brief Calculate the cost of materializing a 64-bit value. This helper +/// method might only calculate a fraction of a larger immediate. Therefore it +/// is valid to return a cost of ZERO. +unsigned ARM64TTI::getIntImmCost(int64_t Val) const { + // Check if the immediate can be encoded within an instruction. + if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, 64)) + return 0; + + if (Val < 0) + Val = ~Val; + + // Calculate how many moves we will need to materialize this constant. + unsigned LZ = countLeadingZeros((uint64_t)Val); + return (64 - LZ + 15) / 16; +} + +/// \brief Calculate the cost of materializing the given constant. unsigned ARM64TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); unsigned BitSize = Ty->getPrimitiveSizeInBits(); - if (BitSize == 0) + if (BitSize == 0 || BitSize > 128) return ~0U; - int64_t Val = Imm.getSExtValue(); - if (Val == 0 || ARM64_AM::isLogicalImmediate(Val, BitSize)) - return 1; + // Sign-extend all constants to a multiple of 64-bit. + APInt ImmVal = Imm; + if (BitSize & 0x3f) + ImmVal = Imm.sext((BitSize + 63) & ~0x3fU); - if ((int64_t)Val < 0) - Val = ~Val; - if (BitSize == 32) - Val &= (1LL << 32) - 1; + // Split the constant into 64-bit chunks and calculate the cost for each + // chunk. + unsigned Cost = 0; + for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { + APInt Tmp = ImmVal.ashr(ShiftVal).getLoBits(64); + int64_t Val = Tmp.getSExtValue(); + Cost += getIntImmCost(Val); + } + // We need at least one instruction to materialze the constant. + return std::max(1U, Cost); +} - unsigned LZ = countLeadingZeros((uint64_t)Val); - unsigned Shift = (63 - LZ) / 16; - // MOVZ is free so return true for one or fewer MOVK. - return (Shift == 0) ? 1 : Shift; +unsigned ARM64TTI::getIntImmCost(unsigned Opcode, unsigned Idx, + const APInt &Imm, Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0 || BitSize > 128) + return ~0U; + + unsigned ImmIdx = ~0U; + switch (Opcode) { + default: + return TCC_Free; + case Instruction::GetElementPtr: + // Always hoist the base address of a GetElementPtr. + if (Idx == 0) + return 2 * TCC_Basic; + return TCC_Free; + case Instruction::Store: + ImmIdx = 0; + break; + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::ICmp: + ImmIdx = 1; + break; + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::IntToPtr: + case Instruction::PtrToInt: + case Instruction::BitCast: + case Instruction::PHI: + case Instruction::Call: + case Instruction::Select: + case Instruction::Ret: + case Instruction::Load: + break; + } + + if (Idx == ImmIdx) { + unsigned NumConstants = (BitSize + 63) / 64; + unsigned Cost = ARM64TTI::getIntImmCost(Imm, Ty); + return (Cost <= NumConstants * TCC_Basic) ? TCC_Free : Cost; + } + return ARM64TTI::getIntImmCost(Imm, Ty); +} + +unsigned ARM64TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, + const APInt &Imm, Type *Ty) const { + assert(Ty->isIntegerTy()); + + unsigned BitSize = Ty->getPrimitiveSizeInBits(); + if (BitSize == 0 || BitSize > 128) + return ~0U; + + switch (IID) { + default: + return TCC_Free; + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + if (Idx == 1) { + unsigned NumConstants = (BitSize + 63) / 64; + unsigned Cost = ARM64TTI::getIntImmCost(Imm, Ty); + return (Cost <= NumConstants * TCC_Basic) ? TCC_Free : Cost; + } + break; + case Intrinsic::experimental_stackmap: + if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TCC_Free; + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) + return TCC_Free; + break; + } + return ARM64TTI::getIntImmCost(Imm, Ty); } ARM64TTI::PopcntSupportKind ARM64TTI::getPopcntSupport(unsigned TyWidth) const { diff --git a/test/CodeGen/ARM64/const-addr.ll b/test/CodeGen/ARM64/const-addr.ll new file mode 100644 index 00000000000..c77a6db8fe5 --- /dev/null +++ b/test/CodeGen/ARM64/const-addr.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple=arm64-darwin-unknown < %s | FileCheck %s + +%T = type { i32, i32, i32, i32 } + +; Test if the constant base address gets only materialized once. +define i32 @test1() nounwind { +; CHECK-LABEL: test1 +; CHECK: movz x8, #1039, lsl #16 +; CHECK-NEXT: movk x8, #49152 +; CHECK-NEXT: ldp w9, w10, [x8, #4] +; CHECK: ldr w8, [x8, #12] + %at = inttoptr i64 68141056 to %T* + %o1 = getelementptr %T* %at, i32 0, i32 1 + %t1 = load i32* %o1 + %o2 = getelementptr %T* %at, i32 0, i32 2 + %t2 = load i32* %o2 + %a1 = add i32 %t1, %t2 + %o3 = getelementptr %T* %at, i32 0, i32 3 + %t3 = load i32* %o3 + %a2 = add i32 %a1, %t3 + ret i32 %a2 +} + diff --git a/test/Transforms/ConstantHoisting/ARM64/const-addr.ll b/test/Transforms/ConstantHoisting/ARM64/const-addr.ll new file mode 100644 index 00000000000..89d596055c4 --- /dev/null +++ b/test/Transforms/ConstantHoisting/ARM64/const-addr.ll @@ -0,0 +1,23 @@ +; RUN: opt -mtriple=arm64-darwin-unknown -S -consthoist < %s | FileCheck %s + +%T = type { i32, i32, i32, i32 } + +define i32 @test1() nounwind { +; CHECK-LABEL: test1 +; CHECK: %const = bitcast i64 68141056 to i64 +; CHECK: %1 = inttoptr i64 %const to %T* +; CHECK: %o1 = getelementptr %T* %1, i32 0, i32 1 +; CHECK: %o2 = getelementptr %T* %1, i32 0, i32 2 +; CHECK: %o3 = getelementptr %T* %1, i32 0, i32 3 + %at = inttoptr i64 68141056 to %T* + %o1 = getelementptr %T* %at, i32 0, i32 1 + %t1 = load i32* %o1 + %o2 = getelementptr %T* %at, i32 0, i32 2 + %t2 = load i32* %o2 + %a1 = add i32 %t1, %t2 + %o3 = getelementptr %T* %at, i32 0, i32 3 + %t3 = load i32* %o3 + %a2 = add i32 %a1, %t3 + ret i32 %a2 +} + diff --git a/test/Transforms/ConstantHoisting/ARM64/lit.local.cfg b/test/Transforms/ConstantHoisting/ARM64/lit.local.cfg new file mode 100644 index 00000000000..84ac9811f01 --- /dev/null +++ b/test/Transforms/ConstantHoisting/ARM64/lit.local.cfg @@ -0,0 +1,3 @@ +targets = set(config.root.targets_to_build.split()) +if not 'ARM64' in targets: + config.unsupported = True