mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-05 11:18:48 +00:00
AMDGPU: SimplifyDemandedElts for image intrinsics
Causes some VGPR usage improvements in shaderdb, but introduces some SGPR spilling regressions due to random scheduling changes later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300453 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5c0006e064
commit
6d01b81485
@ -1641,7 +1641,52 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
|||||||
UndefElts.setHighBits(VWidth / 2);
|
UndefElts.setHighBits(VWidth / 2);
|
||||||
break;
|
break;
|
||||||
case Intrinsic::amdgcn_buffer_load:
|
case Intrinsic::amdgcn_buffer_load:
|
||||||
case Intrinsic::amdgcn_buffer_load_format: {
|
case Intrinsic::amdgcn_buffer_load_format:
|
||||||
|
case Intrinsic::amdgcn_image_sample:
|
||||||
|
case Intrinsic::amdgcn_image_sample_cl:
|
||||||
|
case Intrinsic::amdgcn_image_sample_d:
|
||||||
|
case Intrinsic::amdgcn_image_sample_d_cl:
|
||||||
|
case Intrinsic::amdgcn_image_sample_l:
|
||||||
|
case Intrinsic::amdgcn_image_sample_b:
|
||||||
|
case Intrinsic::amdgcn_image_sample_b_cl:
|
||||||
|
case Intrinsic::amdgcn_image_sample_lz:
|
||||||
|
case Intrinsic::amdgcn_image_sample_cd:
|
||||||
|
case Intrinsic::amdgcn_image_sample_cd_cl:
|
||||||
|
|
||||||
|
case Intrinsic::amdgcn_image_sample_c:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_cl:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_d:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_d_cl:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_l:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_b:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_b_cl:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_lz:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_cd:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_cd_cl:
|
||||||
|
|
||||||
|
case Intrinsic::amdgcn_image_sample_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_cl_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_d_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_d_cl_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_l_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_b_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_b_cl_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_lz_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_cd_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_cd_cl_o:
|
||||||
|
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_cl_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_d_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_d_cl_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_l_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_b_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_b_cl_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_lz_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_cd_o:
|
||||||
|
case Intrinsic::amdgcn_image_sample_c_cd_cl_o:
|
||||||
|
|
||||||
|
case Intrinsic::amdgcn_image_getlod: {
|
||||||
if (VWidth == 1 || !DemandedElts.isMask())
|
if (VWidth == 1 || !DemandedElts.isMask())
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
||||||
@ -1656,8 +1701,17 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
|||||||
Type *NewTy = (NewNumElts == 1) ? EltTy :
|
Type *NewTy = (NewNumElts == 1) ? EltTy :
|
||||||
VectorType::get(EltTy, NewNumElts);
|
VectorType::get(EltTy, NewNumElts);
|
||||||
|
|
||||||
Function *NewIntrin = Intrinsic::getDeclaration(M, II->getIntrinsicID(),
|
auto IID = II->getIntrinsicID();
|
||||||
NewTy);
|
|
||||||
|
bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load ||
|
||||||
|
IID == Intrinsic::amdgcn_buffer_load_format;
|
||||||
|
|
||||||
|
Function *NewIntrin = IsBuffer ?
|
||||||
|
Intrinsic::getDeclaration(M, IID, NewTy) :
|
||||||
|
// Samplers have 3 mangled types.
|
||||||
|
Intrinsic::getDeclaration(M, IID,
|
||||||
|
{ NewTy, II->getArgOperand(0)->getType(),
|
||||||
|
II->getArgOperand(1)->getType()});
|
||||||
|
|
||||||
SmallVector<Value *, 5> Args;
|
SmallVector<Value *, 5> Args;
|
||||||
for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
|
for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
|
||||||
@ -1669,6 +1723,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
|
|||||||
CallInst *NewCall = Builder->CreateCall(NewIntrin, Args);
|
CallInst *NewCall = Builder->CreateCall(NewIntrin, Args);
|
||||||
NewCall->takeName(II);
|
NewCall->takeName(II);
|
||||||
NewCall->copyMetadata(*II);
|
NewCall->copyMetadata(*II);
|
||||||
|
|
||||||
|
if (!IsBuffer) {
|
||||||
|
ConstantInt *DMask = dyn_cast<ConstantInt>(NewCall->getArgOperand(3));
|
||||||
|
if (DMask) {
|
||||||
|
unsigned DMaskVal = DMask->getZExtValue() & 0xf;
|
||||||
|
|
||||||
|
unsigned PopCnt = 0;
|
||||||
|
unsigned NewDMask = 0;
|
||||||
|
for (unsigned I = 0; I < 4; ++I) {
|
||||||
|
const unsigned Bit = 1 << I;
|
||||||
|
if (!!(DMaskVal & Bit)) {
|
||||||
|
if (++PopCnt > NewNumElts)
|
||||||
|
break;
|
||||||
|
|
||||||
|
NewDMask |= Bit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (NewNumElts == 1) {
|
if (NewNumElts == 1) {
|
||||||
return Builder->CreateInsertElement(UndefValue::get(V->getType()),
|
return Builder->CreateInsertElement(UndefValue::get(V->getType()),
|
||||||
NewCall, static_cast<uint64_t>(0));
|
NewCall, static_cast<uint64_t>(0));
|
||||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user