diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index e7aa1a45737..0ea1c7be422 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3432,8 +3432,26 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (auto *CSrc0 = dyn_cast(Src0)) { if (auto *CSrc1 = dyn_cast(Src1)) { Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1); - return replaceInstUsesWith(*II, - ConstantExpr::getSExt(CCmp, II->getType())); + if (CCmp->isNullValue()) { + return replaceInstUsesWith( + *II, ConstantExpr::getSExt(CCmp, II->getType())); + } + + // The result of V_ICMP/V_FCMP assembly instructions (which this + // intrinsic exposes) is one bit per thread, masked with the EXEC + // register (which contains the bitmask of live threads). So a + // comparison that always returns true is the same as a read of the + // EXEC register. + Value *NewF = Intrinsic::getDeclaration( + II->getModule(), Intrinsic::read_register, II->getType()); + Metadata *MDArgs[] = {MDString::get(II->getContext(), "exec")}; + MDNode *MD = MDNode::get(II->getContext(), MDArgs); + Value *Args[] = {MetadataAsValue::get(II->getContext(), MD)}; + CallInst *NewCall = Builder->CreateCall(NewF, Args); + NewCall->addAttribute(AttributeList::FunctionIndex, + Attribute::Convergent); + NewCall->takeName(II); + return replaceInstUsesWith(*II, NewCall); } // Canonicalize constants to RHS. diff --git a/test/Transforms/InstCombine/amdgcn-intrinsics.ll b/test/Transforms/InstCombine/amdgcn-intrinsics.ll index deae5502bcd..357085fd31f 100644 --- a/test/Transforms/InstCombine/amdgcn-intrinsics.ll +++ b/test/Transforms/InstCombine/amdgcn-intrinsics.ll @@ -1259,7 +1259,7 @@ define i64 @icmp_constant_inputs_false() { } ; CHECK-LABEL: @icmp_constant_inputs_true( -; CHECK: ret i64 -1 +; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4 define i64 @icmp_constant_inputs_true() { %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34) ret i64 %result @@ -1524,7 +1524,7 @@ define i64 @fcmp_constant_inputs_false() { } ; CHECK-LABEL: @fcmp_constant_inputs_true( -; CHECK: ret i64 -1 +; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #4 define i64 @fcmp_constant_inputs_true() { %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4) ret i64 %result @@ -1536,3 +1536,5 @@ define i64 @fcmp_constant_to_rhs_olt(float %x) { %result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4) ret i64 %result } + +; CHECK: attributes #4 = { convergent }