Loop Vectorizer: Update the cost model of scatter/gather operations and make

them more expensive. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170995 91177308-0d34-0410-b5e6-96231b3b80d8
2025-01-26 14:25:18 +00:00 · 2012-12-23 07:23:55 +00:00 · 2012-12-23 07:23:55 +00:00 · d54fed2786
commit d54fed2786
parent c4265e1d68
6 changed files with 40 additions and 46 deletions
--- a/include/llvm/Target/TargetTransformImpl.h
+++ b/include/llvm/Target/TargetTransformImpl.h
@ -69,8 +69,6 @@ public:

  virtual ~VectorTargetTransformImpl() {}

-  virtual unsigned getInstrCost(unsigned Opcode, Type *Ty1, Type *Ty2) const;
-
  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;

  virtual unsigned getBroadcastCost(Type *Tp) const;
--- a/include/llvm/TargetTransformInfo.h
+++ b/include/llvm/TargetTransformInfo.h
@ -135,44 +135,28 @@ public:
  virtual bool shouldBuildLookupTables() const {
    return true;
  }
-
  /// getPopcntHwSupport - Return hardware support for population count.
  virtual PopcntHwSupport getPopcntHwSupport(unsigned IntTyWidthInBit) const {
    return None;
  }
-
  /// getIntImmCost - Return the expected cost of materializing the given
  /// integer immediate of the specified type.
  virtual unsigned getIntImmCost(const APInt&, Type*) const {
-    // Default assumption is immediate is cheap.
+    // The default assumption is that the immediate is cheap.
    return 1;
  }
 };

 /// VectorTargetTransformInfo - This interface is used by the vectorizers
 /// to estimate the profitability of vectorization for different instructions.
+/// This interface provides the cost of different IR instructions. The cost
+/// is unit-less and represents the estimated throughput of the instruction
+/// (not the latency!) assuming that all branches are predicted, cache is hit,
+/// etc.
 class VectorTargetTransformInfo {
 public:
  virtual ~VectorTargetTransformInfo() {}

-  /// Returns the expected cost of the instruction opcode. The opcode is one of
-  /// the enums like Instruction::Add. The type arguments are the type of the
-  /// operation.
-  /// Most instructions only use the first type and in that case the second
-  /// operand is ignored.
-  ///
-  /// Exceptions:
-  /// * Br instructions do not use any of the types.
-  /// * Select instructions pass the return type as Ty1 and the selector as Ty2.
-  /// * Cast instructions pass the destination as Ty1 and the source as Ty2.
-  /// * Insert/Extract element pass only the vector type as Ty1.
-  /// * ShuffleVector, Load, Store do not use this call.
-  virtual unsigned getInstrCost(unsigned Opcode,
-                                Type *Ty1 = 0,
-                                Type *Ty2 = 0) const {
-    return 1;
-  }
-
  /// Returns the expected cost of arithmetic ops, such as mul, xor, fsub, etc.
  virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
    return 1;
--- a/lib/Target/TargetTransformImpl.cpp
+++ b/lib/Target/TargetTransformImpl.cpp
@ -132,7 +132,6 @@ int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const {

 std::pair<unsigned, MVT>
 VectorTargetTransformImpl::getTypeLegalizationCost(Type *Ty) const {
-
  LLVMContext &C = Ty->getContext();
  EVT MTy = TLI->getValueType(Ty);

@ -271,7 +270,7 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
    return getScalarizationOverhead(Dst, true, true) + Num * Cost;
  }

-  // We already handled vector-to-vector and scalar-to-scalar conversions. This 
+  // We already handled vector-to-vector and scalar-to-scalar conversions. This
  // is where we handle bitcast between vectors and scalars. We need to assume
  //  that the conversion is scalarized in one way or another.
  if (Opcode == Instruction::BitCast)
@ -283,6 +282,7 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
 }

 unsigned VectorTargetTransformImpl::getCFInstrCost(unsigned Opcode) const {
+  // Branches are assumed to be predicted.
  return 0;
 }

@ -330,12 +330,6 @@ unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode,
  return 1;
 }

-unsigned
-VectorTargetTransformImpl::getInstrCost(unsigned Opcode, Type *Ty1,
-                                        Type *Ty2) const {
-  return 1;
-}
-
 unsigned
 VectorTargetTransformImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                           unsigned Alignment,
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -17988,7 +17988,6 @@ X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
  return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
 }

-
 unsigned
 X86VectorTargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
                                              unsigned Alignment,
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -2080,17 +2080,23 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
    VectorTy = ToVectorTy(ValTy, VF);

    if (VF == 1)
-      return VTTI->getMemoryOpCost(I->getOpcode(), ValTy,
+      return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy,
                                   SI->getAlignment(),
                                   SI->getPointerAddressSpace());

    // Scalarized stores.
    if (!Legal->isConsecutivePtr(SI->getPointerOperand())) {
      unsigned Cost = 0;
-      unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
-                                            ValTy);
-      // The cost of extracting from the value vector.
-      Cost += VF * (ExtCost);
+
+      // The cost of extracting from the value vector and pointer vector.
+      Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+      for (unsigned i = 0; i < VF; ++i) {
+        Cost += VTTI->getVectorInstrCost(Instruction::ExtractElement,
+                                         VectorTy, i);
+        Cost += VTTI->getVectorInstrCost(Instruction::ExtractElement,
+                                         PtrTy, i);
+      }
+
      // The cost of the scalar stores.
      Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
                                         ValTy->getScalarType(),
@ -2107,16 +2113,25 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
    LoadInst *LI = cast<LoadInst>(I);

    if (VF == 1)
-      return VTTI->getMemoryOpCost(I->getOpcode(), RetTy,
+      return VTTI->getMemoryOpCost(I->getOpcode(), VectorTy,
                                   LI->getAlignment(),
                                   LI->getPointerAddressSpace());

    // Scalarized loads.
    if (!Legal->isConsecutivePtr(LI->getPointerOperand())) {
      unsigned Cost = 0;
-      unsigned InCost = VTTI->getInstrCost(Instruction::InsertElement, RetTy);
-      // The cost of inserting the loaded value into the result vector.
-      Cost += VF * (InCost);
+      Type *PtrTy = ToVectorTy(I->getOperand(0)->getType(), VF);
+
+      // The cost of extracting from the pointer vector.
+      for (unsigned i = 0; i < VF; ++i)
+        Cost += VTTI->getVectorInstrCost(Instruction::ExtractElement,
+                                         PtrTy, i);
+
+      // The cost of inserting data to the result vector.
+      for (unsigned i = 0; i < VF; ++i)
+        Cost += VTTI->getVectorInstrCost(Instruction::InsertElement,
+                                         VectorTy, i);
+
      // The cost of the scalar stores.
      Cost += VF * VTTI->getMemoryOpCost(I->getOpcode(),
                                         RetTy->getScalarType(),
@ -2169,18 +2184,19 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
    bool IsVoid = RetTy->isVoidTy();

    unsigned InsCost = (IsVoid ? 0 :
-                        VTTI->getInstrCost(Instruction::InsertElement,
+                        VTTI->getVectorInstrCost(Instruction::InsertElement,
                                           VectorTy));

-    unsigned ExtCost = VTTI->getInstrCost(Instruction::ExtractElement,
+    unsigned ExtCost = VTTI->getVectorInstrCost(Instruction::ExtractElement,
                                          VectorTy);

    // The cost of inserting the results plus extracting each one of the
    // operands.
    Cost += VF * (InsCost + ExtCost * I->getNumOperands());

-    // The cost of executing VF copies of the scalar instruction.
-    Cost += VF * VTTI->getInstrCost(I->getOpcode(), RetTy);
+    // The cost of executing VF copies of the scalar instruction. This opcode
+    // is unknown. Assume that it is the same as 'mul'.
+    Cost += VF * VTTI->getArithmeticInstrCost(Instruction::Mul, VectorTy);
    return Cost;
  }
  }// end of switch.
--- a/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/cost-model.ll
@ -8,8 +8,11 @@ target triple = "x86_64-apple-macosx10.8.0"
@d = common global [2048 x i32] zeroinitializer, align 16
@a = common global [2048 x i32] zeroinitializer, align 16

+; The program below gathers and scatters data. We better not vectorize it.
 ;CHECK: cost_model_1
-;CHECK: <4 x i32>
+;CHECK-NOT: <2 x i32>
+;CHECK-NOT: <4 x i32>
+;CHECK-NOT: <8 x i32>
 ;CHECK: ret void
 define void @cost_model_1() nounwind uwtable noinline ssp {
 entry: