mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-26 12:46:00 +00:00
InstCombine/AMDGPU: Fix constant folding of llvm.amdgcn.{icmp,fcmp}
Summary: The return value of these intrinsics should always have 0 bits for inactive threads. This means that when all arguments are constant and the comparison evaluates to true, the intrinsic should return the current exec mask. Fixes some GL_ARB_shader_ballot tests. Reviewers: arsenm Subscribers: kzhuravl, wdng, yaxunl, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D32344 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@301195 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8978f2978f
commit
7b717b6e43
@ -3432,8 +3432,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
|
||||
if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
|
||||
Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
|
||||
return replaceInstUsesWith(*II,
|
||||
ConstantExpr::getSExt(CCmp, II->getType()));
|
||||
if (CCmp->isNullValue()) {
|
||||
return replaceInstUsesWith(
|
||||
*II, ConstantExpr::getSExt(CCmp, II->getType()));
|
||||
}
|
||||
|
||||
// The result of V_ICMP/V_FCMP assembly instructions (which this
|
||||
// intrinsic exposes) is one bit per thread, masked with the EXEC
|
||||
// register (which contains the bitmask of live threads). So a
|
||||
// comparison that always returns true is the same as a read of the
|
||||
// EXEC register.
|
||||
Value *NewF = Intrinsic::getDeclaration(
|
||||
II->getModule(), Intrinsic::read_register, II->getType());
|
||||
Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")};
|
||||
MDNode *MD = MDNode::get(II->getContext(), MDArgs);
|
||||
Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)};
|
||||
CallInst *NewCall = Builder->CreateCall(NewF, Args);
|
||||
NewCall->addAttribute(AttributeList::FunctionIndex,
|
||||
Attribute::Convergent);
|
||||
NewCall->takeName(II);
|
||||
return replaceInstUsesWith(*II, NewCall);
|
||||
}
|
||||
|
||||
// Canonicalize constants to RHS.
|
||||
|
@ -1259,7 +1259,7 @@ define i64 @icmp_constant_inputs_false() {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @icmp_constant_inputs_true(
|
||||
; CHECK: ret i64 -1
|
||||
; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4
|
||||
define i64 @icmp_constant_inputs_true() {
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34)
|
||||
ret i64 %result
|
||||
@ -1524,7 +1524,7 @@ define i64 @fcmp_constant_inputs_false() {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @fcmp_constant_inputs_true(
|
||||
; CHECK: ret i64 -1
|
||||
; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4
|
||||
define i64 @fcmp_constant_inputs_true() {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4)
|
||||
ret i64 %result
|
||||
@ -1536,3 +1536,5 @@ define i64 @fcmp_constant_to_rhs_olt(float %x) {
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4)
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
; CHECK: attributes #4 = { convergent }
|
||||
|
Loading…
x
Reference in New Issue
Block a user