[Constant Hoisting] Make the constant materialization cost operand dependent

Extend the target hook to take also the operand index into account when calculating the cost of the constant materialization. Related to <rdar://problem/16381500> llvm-svn: 204435
2025-01-09 01:29:52 +00:00 · 2014-03-21 06:04:45 +00:00 · 2014-03-21 06:04:45 +00:00 · f0dff49ad0
commit f0dff49ad0
parent 500abd48d1
7 changed files with 66 additions and 54 deletions
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@ -297,10 +297,10 @@ public:
  /// \brief Return the expected cost of materialization for the given integer
  /// immediate of the specified type for a given instruction. The cost can be
  /// zero if the immediate can be folded into the specified instruction.
-  virtual unsigned getIntImmCost(unsigned Opcode, const APInt &Imm,
-                                 Type *Ty) const;
-  virtual unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
+  virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
                                 Type *Ty) const;
+  virtual unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                 const APInt &Imm, Type *Ty) const;
  /// @}

  /// \name Vector Target Information
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@ -148,14 +148,14 @@ unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
  return PrevTTI->getIntImmCost(Imm, Ty);
 }

-unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, const APInt &Imm,
-                                            Type *Ty) const {
-  return PrevTTI->getIntImmCost(Opcode, Imm, Ty);
+unsigned TargetTransformInfo::getIntImmCost(unsigned Opc, unsigned Idx,
+                                            const APInt &Imm, Type *Ty) const {
+  return PrevTTI->getIntImmCost(Opc, Idx, Imm, Ty);
 }

-unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
-                                            Type *Ty) const {
-  return PrevTTI->getIntImmCost(IID, Imm, Ty);
+unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                                            const APInt &Imm, Type *Ty) const {
+  return PrevTTI->getIntImmCost(IID, Idx, Imm, Ty);
 }

 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
@ -539,12 +539,12 @@ struct NoTTI final : ImmutablePass, TargetTransformInfo {
    return TCC_Basic;
  }

-  unsigned getIntImmCost(unsigned Opcode, const APInt &Imm,
+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
                         Type *Ty) const override {
    return TCC_Free;
  }

-  unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                         Type *Ty) const override {
    return TCC_Free;
  }
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@ -103,9 +103,9 @@ public:

  unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override;

-  unsigned getIntImmCost(unsigned Opcode, const APInt &Imm,
+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
                         Type *Ty) const override;
-  unsigned getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
                         Type *Ty) const override;

  /// @}
@ -776,6 +776,9 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
  if (BitSize == 0)
    return ~0U;

+  if (Imm == 0)
+    return TCC_Free;
+
  if (Imm.getBitWidth() <= 64 &&
      (isInt<32>(Imm.getSExtValue()) || isUInt<32>(Imm.getZExtValue())))
    return TCC_Basic;
@ -783,7 +786,7 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const {
    return 2 * TCC_Basic;
 }

-unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm,
+unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
                               Type *Ty) const {
  assert(Ty->isIntegerTy());

@ -791,7 +794,15 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm,
  if (BitSize == 0)
    return ~0U;

+  unsigned ImmIdx = ~0U;
  switch (Opcode) {
+  default: return TCC_Free;
+  case Instruction::GetElementPtr:
+    if (Idx != 0)
+      return TCC_Free;
+  case Instruction::Store:
+    ImmIdx = 0;
+    break;
  case Instruction::Add:
  case Instruction::Sub:
  case Instruction::Mul:
@ -806,28 +817,31 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, const APInt &Imm,
  case Instruction::Or:
  case Instruction::Xor:
  case Instruction::ICmp:
-    if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
-      return TCC_Free;
-    else
-      return X86TTI::getIntImmCost(Imm, Ty);
+    ImmIdx = 1;
+    break;
  case Instruction::Trunc:
  case Instruction::ZExt:
  case Instruction::SExt:
  case Instruction::IntToPtr:
  case Instruction::PtrToInt:
  case Instruction::BitCast:
+  case Instruction::PHI:
  case Instruction::Call:
  case Instruction::Select:
  case Instruction::Ret:
  case Instruction::Load:
-  case Instruction::Store:
-    return X86TTI::getIntImmCost(Imm, Ty);
+    break;
  }
-  return TargetTransformInfo::getIntImmCost(Opcode, Imm, Ty);
+
+  if ((Idx == ImmIdx) &&
+      Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
+    return TCC_Free;
+
+  return X86TTI::getIntImmCost(Imm, Ty);
 }

-unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
-                               Type *Ty) const {
+unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
+                               const APInt &Imm, Type *Ty) const {
  assert(Ty->isIntegerTy());

  unsigned BitSize = Ty->getPrimitiveSizeInBits();
@ -835,21 +849,24 @@ unsigned X86TTI::getIntImmCost(Intrinsic::ID IID, const APInt &Imm,
    return ~0U;

  switch (IID) {
-  default: return TargetTransformInfo::getIntImmCost(IID, Imm, Ty);
+  default: return TCC_Free;
  case Intrinsic::sadd_with_overflow:
  case Intrinsic::uadd_with_overflow:
  case Intrinsic::ssub_with_overflow:
  case Intrinsic::usub_with_overflow:
  case Intrinsic::smul_with_overflow:
  case Intrinsic::umul_with_overflow:
-    if (Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
+    if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
      return TCC_Free;
    else
      return X86TTI::getIntImmCost(Imm, Ty);
  case Intrinsic::experimental_stackmap:
+    if (Idx < 2)
+      return TCC_Free;
  case Intrinsic::experimental_patchpoint_void:
  case Intrinsic::experimental_patchpoint_i64:
-    if (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))
+    if ((Idx < 4 ) ||
+        (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
      return TCC_Free;
    else
      return X86TTI::getIntImmCost(Imm, Ty);
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@ -29,7 +29,7 @@
 // certain transformations on them, which would create a new expensive constant.
 //
 // This optimization is only applied to integer constants in instructions and
-// simple (this means not nested) constant cast experessions. For example:
+// simple (this means not nested) constant cast expressions. For example:
 // %0 = load i64* inttoptr (i64 big_constant to i64*)
 //===----------------------------------------------------------------------===//

@ -66,7 +66,7 @@ struct ConstantUser {
  ConstantUser(Instruction *Inst, unsigned Idx) : Inst(Inst), OpndIdx(Idx) { }
 };

-/// \brief Keeps track of a constant candidate and its usees.
+/// \brief Keeps track of a constant candidate and its uses.
 struct ConstantCandidate {
  ConstantUseListType Uses;
  ConstantInt *ConstInt;
@ -292,7 +292,7 @@ findConstantInsertionPoint(const ConstantInfo &ConstInfo) const {
 /// \brief Record constant integer ConstInt for instruction Inst at operand
 /// index Idx.
 ///
-/// The operand at index Idx is not necessarily the constant inetger itself. It
+/// The operand at index Idx is not necessarily the constant integer itself. It
 /// could also be a cast instruction or a constant expression that uses the
 // constant integer.
 void ConstantHoisting::collectConstantCandidates(Instruction *Inst,
@ -300,12 +300,12 @@ void ConstantHoisting::collectConstantCandidates(Instruction *Inst,
                                                 ConstantInt *ConstInt) {
  unsigned Cost;
  // Ask the target about the cost of materializing the constant for the given
-  // instruction.
+  // instruction and operand index.
  if (auto IntrInst = dyn_cast<IntrinsicInst>(Inst))
-    Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(),
+    Cost = TTI->getIntImmCost(IntrInst->getIntrinsicID(), Idx,
                              ConstInt->getValue(), ConstInt->getType());
  else
-    Cost = TTI->getIntImmCost(Inst->getOpcode(), ConstInt->getValue(),
+    Cost = TTI->getIntImmCost(Inst->getOpcode(), Idx, ConstInt->getValue(),
                              ConstInt->getType());

  // Ignore cheap integer constants.
@ -582,7 +582,7 @@ bool ConstantHoisting::optimizeConstants(Function &Fn) {
  if (ConstantVec.empty())
    return false;

-  // Finally hoist the base constant and emit materializating code for dependent
+  // Finally hoist the base constant and emit materialization code for dependent
  // constants.
  bool MadeChange = emitBaseConstants();

--- a/llvm/test/CodeGen/X86/lsr-interesting-step.ll
+++ b/llvm/test/CodeGen/X86/lsr-interesting-step.ll
@ -3,26 +3,24 @@
 ; The inner loop should require only one add (and no leas either).
 ; rdar://8100380

-; CHECK:      BB0_3:
-; CHECK-NEXT:   movb    $0, flags(%rdx)
-; CHECK-NEXT:   addq    %rax, %rdx
-; CHECK-NEXT:   cmpq    $8192, %rdx
+; CHECK:      BB0_2:
+; CHECK-NEXT:   movb    $0, flags(%rcx)
+; CHECK-NEXT:   addq    %rax, %rcx
+; CHECK-NEXT:   cmpq    $8192, %rcx
 ; CHECK-NEXT:   jl

@flags = external global [8192 x i8], align 16 ; <[8192 x i8]*> [#uses=1]

 define void @foo() nounwind {
 entry:
-  %tmp = icmp slt i64 2, 8192                     ; <i1> [#uses=1]
-  br i1 %tmp, label %bb, label %bb21
+  br label %bb

 bb:                                               ; preds = %entry
  br label %bb7

 bb7:                                              ; preds = %bb, %bb17
  %tmp8 = phi i64 [ %tmp18, %bb17 ], [ 2, %bb ]   ; <i64> [#uses=2]
-  %tmp9 = icmp slt i64 2, 8192                    ; <i1> [#uses=1]
-  br i1 %tmp9, label %bb10, label %bb17
+  br label %bb10

 bb10:                                             ; preds = %bb7
  br label %bb11
--- a/llvm/test/CodeGen/X86/negate-add-zero.ll
+++ b/llvm/test/CodeGen/X86/negate-add-zero.ll
@ -827,9 +827,7 @@ declare void @_ZN11MatrixTools9transposeI11FixedMatrixIdLi6ELi6ELi0ELi0EEEENT_13
 declare void @_ZN21HNodeTranslateRotate311toCartesianEv(%struct.HNodeTranslateRotate3*)

 define linkonce void @_ZN21HNodeTranslateRotate36setVelERK9CDSVectorIdLi1EN3CDS12DefaultAllocEE(%struct.HNodeTranslateRotate3* %this, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv) {
-entry:
-	%0 = add i32 0, -1		; <i32> [#uses=1]
-	%1 = getelementptr double* null, i32 %0		; <double*> [#uses=1]
+	%1 = getelementptr double* null, i32 -1		; <double*> [#uses=1]
 	%2 = load double* %1, align 8		; <double> [#uses=1]
 	%3 = load double* null, align 8		; <double> [#uses=2]
 	%4 = load double* null, align 8		; <double> [#uses=2]
@ -890,13 +888,12 @@ entry:
 	store double %52, double* %55, align 8
 	%56 = getelementptr %struct.HNodeTranslateRotate3* %this, i32 0, i32 0, i32 10, i32 0, i32 0, i32 2		; <double*> [#uses=1]
 	store double %53, double* %56, align 8
-	%57 = add i32 0, 4		; <i32> [#uses=1]
-	%58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0		; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1]
-	store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %58, align 8
-	%59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 %57, i32* %59, align 4
-	%60 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2		; <i32*> [#uses=1]
-	store i32 3, i32* %60, align 8
+	%57 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 0		; <%"struct.CDSVector<double,0,CDS::DefaultAlloc>"**> [#uses=1]
+	store %"struct.CDSVector<double,0,CDS::DefaultAlloc>"* %velv, %"struct.CDSVector<double,0,CDS::DefaultAlloc>"** %57, align 8
+	%58 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 1		; <i32*> [#uses=1]
+	store i32 4, i32* %58, align 4
+	%59 = getelementptr %"struct.SubVector<CDSVector<double, 1, CDS::DefaultAlloc> >"* null, i32 0, i32 2		; <i32*> [#uses=1]
+	store i32 3, i32* %59, align 8
 	unreachable
 }

--- a/llvm/test/Transforms/ConstantHoisting/X86/phi.ll
+++ b/llvm/test/Transforms/ConstantHoisting/X86/phi.ll
@ -20,10 +20,10 @@ return:

 ; CHECK-LABEL: @test1
 ; CHECK: if.end:
-; CHECK: %const_mat = add i64 %const, 1
-; CHECK-NEXT: %1 = inttoptr i64 %const_mat to i8*
+; CHECK: %2 = inttoptr i64 %const to i8*
+; CHECK-NEXT: br
 ; CHECK: return:
-; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ inttoptr (i64 68719476736 to i8*), %if.end ]
+; CHECK-NEXT: %retval.0 = phi i8* [ null, %entry ], [ %2, %if.end ]
 }

 define void @test2(i1 %cmp, i64** %tmp) {