mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-30 23:20:54 +00:00
[X86] Reduce math for index calculation for inserting and extracting subvectors and elements by exploiting the fact that all supported vector types have a power 2 number of elements.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@251740 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e798009f10
commit
2d7d45bb20
@ -4373,19 +4373,18 @@ static SDValue ExtractSubVector(SDValue Vec, unsigned IdxVal,
|
||||
|
||||
// Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
|
||||
unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
|
||||
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
|
||||
|
||||
// This is the index of the first element of the vectorWidth-bit chunk
|
||||
// we want.
|
||||
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits()) / vectorWidth)
|
||||
* ElemsPerChunk);
|
||||
// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
|
||||
IdxVal &= ~(ElemsPerChunk - 1);
|
||||
|
||||
// If the input is a buildvector just emit a smaller one.
|
||||
if (Vec.getOpcode() == ISD::BUILD_VECTOR)
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
|
||||
makeArrayRef(Vec->op_begin() + NormalizedIdxVal,
|
||||
ElemsPerChunk));
|
||||
makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
|
||||
|
||||
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
|
||||
SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
|
||||
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
|
||||
}
|
||||
|
||||
@ -4423,13 +4422,13 @@ static SDValue InsertSubVector(SDValue Result, SDValue Vec,
|
||||
|
||||
// Insert the relevant vectorWidth bits.
|
||||
unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
|
||||
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
|
||||
|
||||
// This is the index of the first element of the vectorWidth-bit chunk
|
||||
// we want.
|
||||
unsigned NormalizedIdxVal = (((IdxVal * ElVT.getSizeInBits())/vectorWidth)
|
||||
* ElemsPerChunk);
|
||||
// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
|
||||
IdxVal &= ~(ElemsPerChunk - 1);
|
||||
|
||||
SDValue VecIdx = DAG.getIntPtrConstant(NormalizedIdxVal, dl);
|
||||
SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
|
||||
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
|
||||
}
|
||||
|
||||
@ -11390,10 +11389,11 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
||||
MVT EltVT = VecVT.getVectorElementType();
|
||||
|
||||
unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits();
|
||||
assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
|
||||
|
||||
//if (IdxVal >= NumElems/2)
|
||||
// IdxVal -= NumElems/2;
|
||||
IdxVal -= (IdxVal/ElemsPerChunk)*ElemsPerChunk;
|
||||
// Find IdxVal modulo ElemsPerChunk. Since ElemsPerChunk is a power of 2
|
||||
// this can be done with a mask.
|
||||
IdxVal &= ElemsPerChunk - 1;
|
||||
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec,
|
||||
DAG.getConstant(IdxVal, dl, MVT::i32));
|
||||
}
|
||||
@ -11529,7 +11529,9 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
||||
|
||||
// Insert the element into the desired chunk.
|
||||
unsigned NumEltsIn128 = 128 / EltVT.getSizeInBits();
|
||||
unsigned IdxIn128 = IdxVal - (IdxVal / NumEltsIn128) * NumEltsIn128;
|
||||
assert(isPowerOf2_32(NumEltsIn128));
|
||||
// Since NumEltsIn128 is a power of 2 we can use mask instead of modulo.
|
||||
unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1);
|
||||
|
||||
V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1,
|
||||
DAG.getConstant(IdxIn128, dl, MVT::i32));
|
||||
|
Loading…
Reference in New Issue
Block a user