[X86] Reduce math for index calculation for inserting and extracting subvectors and elements by exploiting the fact that all supported vector types have a power 2 number of elements.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251740 91177308-0d34-0410-b5e6-96231b3b80d8
2024-11-30 23:20:54 +00:00 · 2015-10-31 17:27:52 +00:00 · 2015-10-31 17:27:52 +00:00 · 2d7d45bb20
commit 2d7d45bb20
parent e798009f10
1 changed files with 16 additions and 14 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -4373,19 +4373,18 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,

  // Extract the relevant vectorWidth bits.  Generate an EXTRACT_SUBVECTOR
  unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
+  assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");

  // This is the index of the first element of the vectorWidth-bit chunk
-  // we want.
-  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
-                               * ElemsPerChunk);
+  // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+  IdxVal &= ~(ElemsPerChunk - 1);

  // If the input is a buildvector just emit a smaller one.
  if (Vec.getOpcode() == ISD::BUILD_VECTOR)
    return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
-                       makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
-                                    ElemsPerChunk));
+                       makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));

-  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+  SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
 }

@ -4423,13 +4422,13 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec,

  // Insert the relevant vectorWidth bits.
  unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
+  assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");

  // This is the index of the first element of the vectorWidth-bit chunk
-  // we want.
-  unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
-                               * ElemsPerChunk);
+  // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
+  IdxVal &= ~(ElemsPerChunk - 1);

-  SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
+  SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
  return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
 }

@ -11390,10 +11389,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
    MVT EltVT = VecVT.getVectorElementType();

    unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
+    assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");

-    //if (IdxVal >= NumElems/2)
-    //  IdxVal -= NumElems/2;
-    IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
+    // Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2
+    // this can be done with a mask.
+    IdxVal &= ElemsPerChunk - 1;
    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
                       DAG.getConstant(IdxVal, dl, MVT::i32));
  }
@ -11529,7 +11529,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,

    // Insert the element into the desired chunk.
    unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
-    unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
+    assert(isPowerOf2_32(NumEltsIn128));
+    // Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
+    unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);

    V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
                    DAG.getConstant(IdxIn128, dl, MVT::i32));