mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-24 12:20:00 +00:00
[AMDGPU] gfx1010 wave32 icmp/fcmp intrinsic changes for wave32
Differential Revision: https://reviews.llvm.org/D63301 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@363339 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
bfbea497c9
commit
6e4d9e6a89
@ -187,6 +187,10 @@ def int_amdgcn_init_exec_from_input : Intrinsic<[],
|
||||
llvm_i32_ty], // bit offset of the thread count
|
||||
[IntrConvergent]>;
|
||||
|
||||
def int_amdgcn_wavefrontsize :
|
||||
GCCBuiltin<"__builtin_amdgcn_wavefrontsize">,
|
||||
Intrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Intrinsics
|
||||
@ -1302,11 +1306,11 @@ def int_amdgcn_cvt_pk_u8_f32 :
|
||||
>;
|
||||
|
||||
def int_amdgcn_icmp :
|
||||
Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
|
||||
Intrinsic<[llvm_anyint_ty], [llvm_anyint_ty, LLVMMatchType<1>, llvm_i32_ty],
|
||||
[IntrNoMem, IntrConvergent, ImmArg<2>]>;
|
||||
|
||||
def int_amdgcn_fcmp :
|
||||
Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
|
||||
Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty, LLVMMatchType<1>, llvm_i32_ty],
|
||||
[IntrNoMem, IntrConvergent, ImmArg<2>]>;
|
||||
|
||||
def int_amdgcn_readfirstlane :
|
||||
@ -1576,23 +1580,23 @@ def int_amdgcn_udot8 :
|
||||
// Special Intrinsics for backend internal use only. No frontend
|
||||
// should emit calls to these.
|
||||
// ===----------------------------------------------------------------------===//
|
||||
def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
|
||||
def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
|
||||
[llvm_i1_ty], [IntrConvergent]
|
||||
>;
|
||||
|
||||
def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
|
||||
[llvm_i64_ty], [IntrConvergent]
|
||||
def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_anyint_ty],
|
||||
[llvm_anyint_ty], [IntrConvergent]
|
||||
>;
|
||||
|
||||
def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty],
|
||||
[llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
|
||||
def int_amdgcn_if_break : Intrinsic<[llvm_anyint_ty],
|
||||
[llvm_i1_ty, llvm_anyint_ty], [IntrNoMem, IntrConvergent]
|
||||
>;
|
||||
|
||||
def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
|
||||
[llvm_i64_ty], [IntrConvergent]
|
||||
[llvm_anyint_ty], [IntrConvergent]
|
||||
>;
|
||||
|
||||
def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
|
||||
def int_amdgcn_end_cf : Intrinsic<[], [llvm_anyint_ty], [IntrConvergent]>;
|
||||
|
||||
// Represent unreachable in a divergent region.
|
||||
def int_amdgcn_unreachable : Intrinsic<[], [], [IntrConvergent]>;
|
||||
|
@ -249,7 +249,8 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
|
||||
// We need to know how many lanes are active within the wavefront, and we do
|
||||
// this by doing a ballot of active lanes.
|
||||
CallInst *const Ballot =
|
||||
B.CreateIntrinsic(Intrinsic::amdgcn_icmp, {B.getInt32Ty()},
|
||||
B.CreateIntrinsic(Intrinsic::amdgcn_icmp,
|
||||
{B.getInt64Ty(), B.getInt32Ty()},
|
||||
{B.getInt32(1), B.getInt32(0), B.getInt32(33)});
|
||||
|
||||
// We need to know how many lanes are active within the wavefront that are
|
||||
|
@ -1028,6 +1028,10 @@ public:
|
||||
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
|
||||
const override;
|
||||
|
||||
bool isWave32() const {
|
||||
return WavefrontSize == 32;
|
||||
}
|
||||
|
||||
/// \returns Maximum number of work groups per compute unit supported by the
|
||||
/// subtarget and limited by given \p FlatWorkGroupSize.
|
||||
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
|
||||
|
@ -12,11 +12,13 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "AMDGPUSubtarget.h"
|
||||
#include "llvm/ADT/DepthFirstIterator.h"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
|
||||
#include "llvm/Analysis/LoopInfo.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/BasicBlock.h"
|
||||
#include "llvm/IR/CFG.h"
|
||||
#include "llvm/IR/Constant.h"
|
||||
@ -55,13 +57,13 @@ class SIAnnotateControlFlow : public FunctionPass {
|
||||
|
||||
Type *Boolean;
|
||||
Type *Void;
|
||||
Type *Int64;
|
||||
Type *IntMask;
|
||||
Type *ReturnStruct;
|
||||
|
||||
ConstantInt *BoolTrue;
|
||||
ConstantInt *BoolFalse;
|
||||
UndefValue *BoolUndef;
|
||||
Constant *Int64Zero;
|
||||
Constant *IntMaskZero;
|
||||
|
||||
Function *If;
|
||||
Function *Else;
|
||||
@ -74,6 +76,8 @@ class SIAnnotateControlFlow : public FunctionPass {
|
||||
|
||||
LoopInfo *LI;
|
||||
|
||||
void initialize(Module &M, const GCNSubtarget &ST);
|
||||
|
||||
bool isUniform(BranchInst *T);
|
||||
|
||||
bool isTopOfStack(BasicBlock *BB);
|
||||
@ -103,8 +107,6 @@ public:
|
||||
|
||||
SIAnnotateControlFlow() : FunctionPass(ID) {}
|
||||
|
||||
bool doInitialization(Module &M) override;
|
||||
|
||||
bool runOnFunction(Function &F) override;
|
||||
|
||||
StringRef getPassName() const override { return "SI annotate control flow"; }
|
||||
@ -114,6 +116,7 @@ public:
|
||||
AU.addRequired<DominatorTreeWrapperPass>();
|
||||
AU.addRequired<LegacyDivergenceAnalysis>();
|
||||
AU.addPreserved<DominatorTreeWrapperPass>();
|
||||
AU.addRequired<TargetPassConfig>();
|
||||
FunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
};
|
||||
@ -124,31 +127,34 @@ INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE,
|
||||
"Annotate SI Control Flow", false, false)
|
||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||
INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
|
||||
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
|
||||
INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE,
|
||||
"Annotate SI Control Flow", false, false)
|
||||
|
||||
char SIAnnotateControlFlow::ID = 0;
|
||||
|
||||
/// Initialize all the types and constants used in the pass
|
||||
bool SIAnnotateControlFlow::doInitialization(Module &M) {
|
||||
void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
|
||||
LLVMContext &Context = M.getContext();
|
||||
|
||||
Void = Type::getVoidTy(Context);
|
||||
Boolean = Type::getInt1Ty(Context);
|
||||
Int64 = Type::getInt64Ty(Context);
|
||||
ReturnStruct = StructType::get(Boolean, Int64);
|
||||
IntMask = ST.isWave32() ? Type::getInt32Ty(Context)
|
||||
: Type::getInt64Ty(Context);
|
||||
ReturnStruct = StructType::get(Boolean, IntMask);
|
||||
|
||||
BoolTrue = ConstantInt::getTrue(Context);
|
||||
BoolFalse = ConstantInt::getFalse(Context);
|
||||
BoolUndef = UndefValue::get(Boolean);
|
||||
Int64Zero = ConstantInt::get(Int64, 0);
|
||||
IntMaskZero = ConstantInt::get(IntMask, 0);
|
||||
|
||||
If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if);
|
||||
Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else);
|
||||
IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break);
|
||||
Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop);
|
||||
EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf);
|
||||
return false;
|
||||
If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if, { IntMask });
|
||||
Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else,
|
||||
{ IntMask, IntMask });
|
||||
IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break,
|
||||
{ IntMask, IntMask });
|
||||
Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop, { IntMask });
|
||||
EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf, { IntMask });
|
||||
}
|
||||
|
||||
/// Is the branch condition uniform or did the StructurizeCFG pass
|
||||
@ -258,14 +264,14 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
|
||||
return;
|
||||
|
||||
BasicBlock *Target = Term->getSuccessor(1);
|
||||
PHINode *Broken = PHINode::Create(Int64, 0, "phi.broken", &Target->front());
|
||||
PHINode *Broken = PHINode::Create(IntMask, 0, "phi.broken", &Target->front());
|
||||
|
||||
Value *Cond = Term->getCondition();
|
||||
Term->setCondition(BoolTrue);
|
||||
Value *Arg = handleLoopCondition(Cond, Broken, L, Term);
|
||||
|
||||
for (BasicBlock *Pred : predecessors(Target)) {
|
||||
Value *PHIValue = Int64Zero;
|
||||
Value *PHIValue = IntMaskZero;
|
||||
if (Pred == BB) // Remember the value of the previous iteration.
|
||||
PHIValue = Arg;
|
||||
// If the backedge from Pred to Target could be executed before the exit
|
||||
@ -316,6 +322,10 @@ bool SIAnnotateControlFlow::runOnFunction(Function &F) {
|
||||
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
DA = &getAnalysis<LegacyDivergenceAnalysis>();
|
||||
TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
|
||||
const TargetMachine &TM = TPC.getTM<TargetMachine>();
|
||||
|
||||
initialize(*F.getParent(), TM.getSubtarget<GCNSubtarget>(F));
|
||||
|
||||
for (df_iterator<BasicBlock *> I = df_begin(&F.getEntryBlock()),
|
||||
E = df_end(&F.getEntryBlock()); I != E; ++I) {
|
||||
|
@ -3839,7 +3839,6 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
|
||||
|
||||
ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
|
||||
|
||||
|
||||
SDValue LHS = N->getOperand(1);
|
||||
SDValue RHS = N->getOperand(2);
|
||||
|
||||
@ -3855,8 +3854,14 @@ static SDValue lowerICMPIntrinsic(const SITargetLowering &TLI,
|
||||
|
||||
ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
|
||||
|
||||
return DAG.getNode(AMDGPUISD::SETCC, DL, VT, LHS, RHS,
|
||||
DAG.getCondCode(CCOpcode));
|
||||
unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
|
||||
EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
|
||||
|
||||
SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, DL, CCVT, LHS, RHS,
|
||||
DAG.getCondCode(CCOpcode));
|
||||
if (VT.bitsEq(CCVT))
|
||||
return SetCC;
|
||||
return DAG.getZExtOrTrunc(SetCC, DL, VT);
|
||||
}
|
||||
|
||||
static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
|
||||
@ -3882,8 +3887,13 @@ static SDValue lowerFCMPIntrinsic(const SITargetLowering &TLI,
|
||||
|
||||
FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
|
||||
ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
|
||||
return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src0,
|
||||
Src1, DAG.getCondCode(CCOpcode));
|
||||
unsigned WavefrontSize = TLI.getSubtarget()->getWavefrontSize();
|
||||
EVT CCVT = EVT::getIntegerVT(*DAG.getContext(), WavefrontSize);
|
||||
SDValue SetCC = DAG.getNode(AMDGPUISD::SETCC, SL, CCVT, Src0,
|
||||
Src1, DAG.getCondCode(CCOpcode));
|
||||
if (VT.bitsEq(CCVT))
|
||||
return SetCC;
|
||||
return DAG.getZExtOrTrunc(SetCC, SL, VT);
|
||||
}
|
||||
|
||||
void SITargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
@ -5394,6 +5404,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
return loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
|
||||
SDLoc(DAG.getEntryNode()),
|
||||
MFI->getArgInfo().WorkItemIDZ);
|
||||
case Intrinsic::amdgcn_wavefrontsize:
|
||||
return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
|
||||
SDLoc(Op), MVT::i32);
|
||||
case Intrinsic::amdgcn_s_buffer_load: {
|
||||
unsigned Cache = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
|
||||
return lowerSBuffer(VT, DL, Op.getOperand(1), Op.getOperand(2),
|
||||
@ -5598,6 +5611,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
case Intrinsic::amdgcn_fmad_ftz:
|
||||
return DAG.getNode(AMDGPUISD::FMAD_FTZ, DL, VT, Op.getOperand(1),
|
||||
Op.getOperand(2), Op.getOperand(3));
|
||||
|
||||
case Intrinsic::amdgcn_if_break:
|
||||
return SDValue(DAG.getMachineNode(AMDGPU::SI_IF_BREAK, DL, VT,
|
||||
Op->getOperand(1), Op->getOperand(2)), 0);
|
||||
|
||||
default:
|
||||
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
|
||||
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
|
||||
@ -6495,6 +6513,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
M->getMemoryVT(), M->getMemOperand());
|
||||
}
|
||||
|
||||
case Intrinsic::amdgcn_end_cf:
|
||||
return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
|
||||
Op->getOperand(2), Chain), 0);
|
||||
|
||||
default: {
|
||||
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
|
||||
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
|
||||
|
@ -604,7 +604,12 @@ def : Pat <
|
||||
// TODO: we could add more variants for other types of conditionals
|
||||
|
||||
def : Pat <
|
||||
(int_amdgcn_icmp i1:$src, (i1 0), (i32 33)),
|
||||
(i64 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))),
|
||||
(COPY $src) // Return the SGPRs representing i1 src
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i32 (int_amdgcn_icmp i1:$src, (i1 0), (i32 33))),
|
||||
(COPY $src) // Return the SGPRs representing i1 src
|
||||
>;
|
||||
|
||||
|
@ -3733,7 +3733,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
|
||||
break;
|
||||
|
||||
Function *NewF =
|
||||
Intrinsic::getDeclaration(II->getModule(), NewIID, SrcLHS->getType());
|
||||
Intrinsic::getDeclaration(II->getModule(), NewIID,
|
||||
{ II->getType(),
|
||||
SrcLHS->getType() });
|
||||
Value *Args[] = { SrcLHS, SrcRHS,
|
||||
ConstantInt::get(CC->getType(), SrcPred) };
|
||||
CallInst *NewCall = Builder.CreateCall(NewF, Args);
|
||||
|
@ -38,8 +38,8 @@ sw.epilog:
|
||||
; CHECK: load i8
|
||||
; CHECK-NOT: {{ br }}
|
||||
; CHECK: [[ICMP:%[a-zA-Z0-9._]+]] = icmp eq
|
||||
; CHECK: [[IF:%[a-zA-Z0-9._]+]] = call i64 @llvm.amdgcn.if.break(i1 [[ICMP]], i64 [[PHI]])
|
||||
; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop(i64 [[IF]])
|
||||
; CHECK: [[IF:%[a-zA-Z0-9._]+]] = call i64 @llvm.amdgcn.if.break.i64.i64(i1 [[ICMP]], i64 [[PHI]])
|
||||
; CHECK: [[LOOP:%[a-zA-Z0-9._]+]] = call i1 @llvm.amdgcn.loop.i64(i64 [[IF]])
|
||||
; CHECK: br i1 [[LOOP]]
|
||||
|
||||
sw.while:
|
||||
|
@ -15,12 +15,12 @@
|
||||
; OPT: br label %Flow
|
||||
|
||||
; OPT: Flow:
|
||||
; OPT: call i64 @llvm.amdgcn.if.break(
|
||||
; OPT: call i1 @llvm.amdgcn.loop(i64
|
||||
; OPT: call i64 @llvm.amdgcn.if.break.i64.i64(
|
||||
; OPT: call i1 @llvm.amdgcn.loop.i64(i64
|
||||
; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1
|
||||
|
||||
; OPT: bb9:
|
||||
; OPT: call void @llvm.amdgcn.end.cf(i64
|
||||
; OPT: call void @llvm.amdgcn.end.cf.i64(i64
|
||||
|
||||
; GCN-LABEL: {{^}}break_loop:
|
||||
; GCN: s_mov_b64 [[OUTER_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
|
||||
@ -84,12 +84,12 @@ bb9:
|
||||
; OPT: Flow:
|
||||
; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
|
||||
; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
|
||||
; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken)
|
||||
; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0)
|
||||
; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
|
||||
; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
|
||||
; OPT-NEXT: br i1 %1, label %bb9, label %bb1
|
||||
|
||||
; OPT: bb9: ; preds = %Flow
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0)
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
|
||||
; OPT-NEXT: store volatile i32 7
|
||||
; OPT-NEXT: ret void
|
||||
define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
|
||||
@ -138,12 +138,12 @@ bb9: ; preds = %Flow
|
||||
; OPT: Flow:
|
||||
; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
|
||||
; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
|
||||
; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken)
|
||||
; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0)
|
||||
; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
|
||||
; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
|
||||
; OPT-NEXT: br i1 %1, label %bb9, label %bb1
|
||||
|
||||
; OPT: bb9: ; preds = %Flow
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0)
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
|
||||
; OPT-NEXT: store volatile i32 7
|
||||
; OPT-NEXT: ret void
|
||||
define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
|
||||
@ -189,12 +189,12 @@ bb9: ; preds = %Flow
|
||||
; OPT: Flow:
|
||||
; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
|
||||
; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
|
||||
; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken)
|
||||
; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0)
|
||||
; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
|
||||
; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
|
||||
; OPT-NEXT: br i1 %1, label %bb9, label %bb1
|
||||
|
||||
; OPT: bb9: ; preds = %Flow
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0)
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
|
||||
; OPT-NEXT: store volatile i32 7
|
||||
; OPT-NEXT: ret void
|
||||
define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
|
||||
@ -239,12 +239,12 @@ bb9: ; preds = %Flow
|
||||
; OPT: Flow:
|
||||
; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
|
||||
; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
|
||||
; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %tmp3, i64 %phi.broken)
|
||||
; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %0)
|
||||
; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
|
||||
; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
|
||||
; OPT-NEXT: br i1 %1, label %bb9, label %bb1
|
||||
|
||||
; OPT: bb9: ; preds = %Flow
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %0)
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
|
||||
; OPT-NEXT: store volatile i32 7
|
||||
; OPT-NEXT: ret void
|
||||
define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
|
||||
@ -294,12 +294,12 @@ bb9: ; preds = %Flow
|
||||
; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
|
||||
; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
|
||||
; OPT-NEXT: %0 = xor i1 %tmp3, true
|
||||
; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %0, i64 %phi.broken)
|
||||
; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %1)
|
||||
; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %0, i64 %phi.broken)
|
||||
; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop.i64(i64 %1)
|
||||
; OPT-NEXT: br i1 %2, label %bb9, label %bb1
|
||||
|
||||
; OPT: bb9:
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %1)
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %1)
|
||||
; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
|
||||
; OPT-NEXT: ret void
|
||||
define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
|
||||
|
@ -9,7 +9,7 @@
|
||||
; StructurizeCFG.
|
||||
|
||||
; IR-LABEL: @multi_divergent_region_exit_ret_ret(
|
||||
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
|
||||
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %0)
|
||||
; IR: %2 = extractvalue { i1, i64 } %1, 0
|
||||
; IR: %3 = extractvalue { i1, i64 } %1, 1
|
||||
; IR: br i1 %2, label %LeafBlock1, label %Flow
|
||||
@ -17,7 +17,7 @@
|
||||
; IR: Flow:
|
||||
; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
|
||||
; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
|
||||
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
|
||||
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %3)
|
||||
; IR: %7 = extractvalue { i1, i64 } %6, 0
|
||||
; IR: %8 = extractvalue { i1, i64 } %6, 1
|
||||
; IR: br i1 %7, label %LeafBlock, label %Flow1
|
||||
@ -30,8 +30,8 @@
|
||||
|
||||
; IR: Flow2:
|
||||
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
|
||||
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19)
|
||||
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %11)
|
||||
; IR: %13 = extractvalue { i1, i64 } %12, 0
|
||||
; IR: %14 = extractvalue { i1, i64 } %12, 1
|
||||
; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
|
||||
@ -43,8 +43,8 @@
|
||||
; IR: Flow1:
|
||||
; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
|
||||
; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %8)
|
||||
; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %8)
|
||||
; IR: %17 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %16)
|
||||
; IR: %18 = extractvalue { i1, i64 } %17, 0
|
||||
; IR: %19 = extractvalue { i1, i64 } %17, 1
|
||||
; IR: br i1 %18, label %exit1, label %Flow2
|
||||
@ -54,7 +54,7 @@
|
||||
; IR: br label %Flow2
|
||||
|
||||
; IR: UnifiedReturnBlock:
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %14)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %14)
|
||||
; IR: ret void
|
||||
|
||||
|
||||
@ -141,13 +141,13 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
||||
}
|
||||
|
||||
; IR-LABEL: @multi_divergent_region_exit_unreachable_unreachable(
|
||||
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
|
||||
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %0)
|
||||
|
||||
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
|
||||
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %3)
|
||||
|
||||
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
|
||||
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19)
|
||||
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %11)
|
||||
; IR: br i1 %13, label %exit0, label %UnifiedUnreachableBlock
|
||||
|
||||
|
||||
@ -203,7 +203,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
||||
; IR: {{^}}Flow:
|
||||
; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
|
||||
; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
|
||||
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
|
||||
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %3)
|
||||
; IR: br i1 %7, label %LeafBlock, label %Flow1
|
||||
|
||||
; IR: {{^}}LeafBlock:
|
||||
@ -218,8 +218,8 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
||||
|
||||
; IR: Flow2:
|
||||
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
|
||||
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19)
|
||||
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %11)
|
||||
; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
|
||||
|
||||
; IR: exit0:
|
||||
@ -229,8 +229,8 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
||||
; IR: {{^}}Flow1:
|
||||
; IR: %15 = phi i1 [ %divergent.cond1, %LeafBlock ], [ %4, %Flow ]
|
||||
; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %8)
|
||||
; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %8)
|
||||
; IR: %17 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %16)
|
||||
; IR: %18 = extractvalue { i1, i64 } %17, 0
|
||||
; IR: %19 = extractvalue { i1, i64 } %17, 1
|
||||
; IR: br i1 %18, label %exit1, label %Flow2
|
||||
@ -240,7 +240,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
||||
; IR: br label %Flow2
|
||||
|
||||
; IR: UnifiedReturnBlock:
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %14)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %14)
|
||||
; IR: ret void
|
||||
define amdgpu_kernel void @multi_exit_region_divergent_ret_uniform_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
|
||||
entry:
|
||||
@ -279,17 +279,17 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
||||
}
|
||||
|
||||
; IR-LABEL: @multi_exit_region_uniform_ret_divergent_ret(
|
||||
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
|
||||
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %0)
|
||||
; IR: br i1 %2, label %LeafBlock1, label %Flow
|
||||
|
||||
; IR: Flow:
|
||||
; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
|
||||
; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
|
||||
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
|
||||
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %3)
|
||||
|
||||
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
|
||||
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19)
|
||||
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %11)
|
||||
|
||||
define amdgpu_kernel void @multi_exit_region_uniform_ret_divergent_ret(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2, i32 %arg3) #0 {
|
||||
entry:
|
||||
@ -330,11 +330,11 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
||||
; IR-LABEL: @multi_divergent_region_exit_ret_ret_return_value(
|
||||
; IR: Flow2:
|
||||
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19)
|
||||
|
||||
; IR: UnifiedReturnBlock:
|
||||
; IR: %UnifiedRetVal = phi float [ 2.000000e+00, %Flow2 ], [ 1.000000e+00, %exit0 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %14)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %14)
|
||||
; IR: ret float %UnifiedRetVal
|
||||
define amdgpu_ps float @multi_divergent_region_exit_ret_ret_return_value(i32 %vgpr) #0 {
|
||||
entry:
|
||||
@ -402,17 +402,17 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
||||
}
|
||||
|
||||
; IR-LABEL: @multi_divergent_region_exit_ret_unreachable(
|
||||
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if(i1 %0)
|
||||
; IR: %1 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %0)
|
||||
|
||||
; IR: Flow:
|
||||
; IR: %4 = phi i1 [ true, %LeafBlock1 ], [ false, %entry ]
|
||||
; IR: %5 = phi i1 [ %10, %LeafBlock1 ], [ false, %entry ]
|
||||
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else(i64 %3)
|
||||
; IR: %6 = call { i1, i64 } @llvm.amdgcn.else.i64.i64(i64 %3)
|
||||
|
||||
; IR: Flow2:
|
||||
; IR: %11 = phi i1 [ false, %exit1 ], [ %15, %Flow1 ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %19)
|
||||
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if(i1 %11)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %19)
|
||||
; IR: %12 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %11)
|
||||
; IR: br i1 %13, label %exit0, label %UnifiedReturnBlock
|
||||
|
||||
; IR: exit0:
|
||||
@ -422,8 +422,8 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
||||
; IR: Flow1:
|
||||
; IR: %15 = phi i1 [ %SwitchLeaf, %LeafBlock ], [ %4, %Flow ]
|
||||
; IR: %16 = phi i1 [ %9, %LeafBlock ], [ %5, %Flow ]
|
||||
; IR: call void @llvm.amdgcn.end.cf(i64 %8)
|
||||
; IR: %17 = call { i1, i64 } @llvm.amdgcn.if(i1 %16)
|
||||
; IR: call void @llvm.amdgcn.end.cf.i64(i64 %8)
|
||||
; IR: %17 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %16)
|
||||
; IR: %18 = extractvalue { i1, i64 } %17, 0
|
||||
; IR: %19 = extractvalue { i1, i64 } %17, 1
|
||||
; IR: br i1 %18, label %exit1, label %Flow2
|
||||
@ -434,7 +434,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
||||
; IR-NEXT: br label %Flow2
|
||||
|
||||
; IR: UnifiedReturnBlock:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %14)
|
||||
; IR-NEXT: ret void
|
||||
define amdgpu_kernel void @multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
|
||||
entry:
|
||||
@ -490,7 +490,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
|
||||
; IR-NEXT: br label %Flow2
|
||||
|
||||
; IR: UnifiedReturnBlock: ; preds = %exit0, %Flow2
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %14)
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %14)
|
||||
; IR-NEXT: ret void
|
||||
define amdgpu_kernel void @indirect_multi_divergent_region_exit_ret_unreachable(i32 addrspace(1)* nocapture %arg0, i32 addrspace(1)* nocapture %arg1, i32 addrspace(1)* nocapture %arg2) #0 {
|
||||
entry:
|
||||
@ -645,7 +645,7 @@ uniform.ret:
|
||||
; IR: br i1 %11, label %uniform.endif, label %uniform.ret0
|
||||
|
||||
; IR: UnifiedReturnBlock: ; preds = %Flow3, %Flow2
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %6)
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %6)
|
||||
; IR-NEXT: ret void
|
||||
define amdgpu_kernel void @uniform_complex_multi_ret_nest_in_divergent_triangle(i32 %arg0) #0 {
|
||||
entry:
|
||||
@ -691,7 +691,7 @@ divergent.ret:
|
||||
; IR-NEXT: br label %UnifiedReturnBlock
|
||||
|
||||
; IR: UnifiedReturnBlock:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64
|
||||
; IR-NEXT: ret void
|
||||
define amdgpu_kernel void @multi_divergent_unreachable_exit() #0 {
|
||||
bb:
|
||||
|
@ -5,7 +5,7 @@
|
||||
; OPT: main_body:
|
||||
; OPT: LOOP.outer:
|
||||
; OPT: LOOP:
|
||||
; OPT: [[if:%[0-9]+]] = call { i1, i64 } @llvm.amdgcn.if(
|
||||
; OPT: [[if:%[0-9]+]] = call { i1, i64 } @llvm.amdgcn.if.i64(
|
||||
; OPT: [[if_exec:%[0-9]+]] = extractvalue { i1, i64 } [[if]], 1
|
||||
;
|
||||
; OPT: Flow:
|
||||
@ -13,9 +13,9 @@
|
||||
; Ensure two if.break calls, for both the inner and outer loops
|
||||
|
||||
; OPT: call void @llvm.amdgcn.end.cf
|
||||
; OPT-NEXT: call i64 @llvm.amdgcn.if.break(i1
|
||||
; OPT-NEXT: call i1 @llvm.amdgcn.loop(i64
|
||||
; OPT-NEXT: call i64 @llvm.amdgcn.if.break(i1
|
||||
; OPT-NEXT: call i64 @llvm.amdgcn.if.break.i64.i64(i1
|
||||
; OPT-NEXT: call i1 @llvm.amdgcn.loop.i64(i64
|
||||
; OPT-NEXT: call i64 @llvm.amdgcn.if.break.i64.i64(i1
|
||||
;
|
||||
; OPT: Flow1:
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
; IR-NEXT: %phi.broken = phi i64 [ %3, %bb10 ], [ 0, %bb ]
|
||||
; IR-NEXT: %tmp6 = phi i32 [ 0, %bb ], [ %tmp11, %bb10 ]
|
||||
; IR-NEXT: %tmp7 = icmp eq i32 %tmp6, 1
|
||||
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %tmp7)
|
||||
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %tmp7)
|
||||
; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0
|
||||
; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1
|
||||
; IR-NEXT: br i1 %1, label %bb8, label %Flow
|
||||
@ -24,14 +24,14 @@
|
||||
; IR: bb10:
|
||||
; IR-NEXT: %tmp11 = phi i32 [ %6, %Flow ]
|
||||
; IR-NEXT: %tmp12 = phi i1 [ %5, %Flow ]
|
||||
; IR-NEXT: %3 = call i64 @llvm.amdgcn.if.break(i1 %tmp12, i64 %phi.broken)
|
||||
; IR-NEXT: %4 = call i1 @llvm.amdgcn.loop(i64 %3)
|
||||
; IR-NEXT: %3 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp12, i64 %phi.broken)
|
||||
; IR-NEXT: %4 = call i1 @llvm.amdgcn.loop.i64(i64 %3)
|
||||
; IR-NEXT: br i1 %4, label %bb23, label %bb5
|
||||
|
||||
; IR: Flow:
|
||||
; IR-NEXT: %5 = phi i1 [ %tmp22, %bb4 ], [ true, %bb5 ]
|
||||
; IR-NEXT: %6 = phi i32 [ %tmp21, %bb4 ], [ undef, %bb5 ]
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %2)
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %2)
|
||||
; IR-NEXT: br label %bb10
|
||||
|
||||
; IR: bb13:
|
||||
@ -51,7 +51,7 @@
|
||||
; IR-NEXT: br label %bb9
|
||||
|
||||
; IR: bb23:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %3)
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %3)
|
||||
; IR-NEXT: ret void
|
||||
|
||||
; GCN-LABEL: {{^}}reduced_nested_loop_conditions:
|
||||
@ -121,27 +121,27 @@ bb23: ; preds = %bb10
|
||||
; IR-LABEL: @nested_loop_conditions(
|
||||
|
||||
; IR: Flow3:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %21)
|
||||
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %14)
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %21)
|
||||
; IR-NEXT: %0 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %14)
|
||||
; IR-NEXT: %1 = extractvalue { i1, i64 } %0, 0
|
||||
; IR-NEXT: %2 = extractvalue { i1, i64 } %0, 1
|
||||
; IR-NEXT: br i1 %1, label %bb4.bb13_crit_edge, label %Flow4
|
||||
|
||||
; IR: Flow4:
|
||||
; IR-NEXT: %3 = phi i1 [ true, %bb4.bb13_crit_edge ], [ false, %Flow3 ]
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %2)
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %2)
|
||||
; IR-NEXT: br label %Flow
|
||||
|
||||
; IR: Flow:
|
||||
; IR-NEXT: %4 = phi i1 [ %3, %Flow4 ], [ true, %bb ]
|
||||
; IR-NEXT: %5 = call { i1, i64 } @llvm.amdgcn.if(i1 %4)
|
||||
; IR-NEXT: %5 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %4)
|
||||
; IR-NEXT: %6 = extractvalue { i1, i64 } %5, 0
|
||||
; IR-NEXT: %7 = extractvalue { i1, i64 } %5, 1
|
||||
; IR-NEXT: br i1 %6, label %bb13, label %bb31
|
||||
|
||||
; IR: bb14:
|
||||
; IR: %tmp15 = icmp eq i32 %tmp1037, 1
|
||||
; IR-NEXT: %8 = call { i1, i64 } @llvm.amdgcn.if(i1 %tmp15)
|
||||
; IR-NEXT: %8 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %tmp15)
|
||||
|
||||
; IR: Flow1:
|
||||
; IR-NEXT: %11 = phi <4 x i32> [ %tmp9, %bb21 ], [ undef, %bb14 ]
|
||||
@ -149,9 +149,9 @@ bb23: ; preds = %bb10
|
||||
; IR-NEXT: %13 = phi i1 [ %18, %bb21 ], [ true, %bb14 ]
|
||||
; IR-NEXT: %14 = phi i1 [ %18, %bb21 ], [ false, %bb14 ]
|
||||
; IR-NEXT: %15 = phi i1 [ false, %bb21 ], [ true, %bb14 ]
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %10)
|
||||
; IR-NEXT: %16 = call i64 @llvm.amdgcn.if.break(i1 %13, i64 %phi.broken)
|
||||
; IR-NEXT: %17 = call i1 @llvm.amdgcn.loop(i64 %16)
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %10)
|
||||
; IR-NEXT: %16 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %13, i64 %phi.broken)
|
||||
; IR-NEXT: %17 = call i1 @llvm.amdgcn.loop.i64(i64 %16)
|
||||
; IR-NEXT: br i1 %17, label %Flow2, label %bb14
|
||||
|
||||
; IR: bb21:
|
||||
@ -160,14 +160,14 @@ bb23: ; preds = %bb10
|
||||
; IR-NEXT: br label %Flow1
|
||||
|
||||
; IR: Flow2:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %16)
|
||||
; IR-NEXT: %19 = call { i1, i64 } @llvm.amdgcn.if(i1 %15)
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %16)
|
||||
; IR-NEXT: %19 = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %15)
|
||||
; IR-NEXT: %20 = extractvalue { i1, i64 } %19, 0
|
||||
; IR-NEXT: %21 = extractvalue { i1, i64 } %19, 1
|
||||
; IR-NEXT: br i1 %20, label %bb31.loopexit, label %Flow3
|
||||
|
||||
; IR: bb31:
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf(i64 %7)
|
||||
; IR-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %7)
|
||||
; IR-NEXT: store volatile i32 0, i32 addrspace(1)* undef
|
||||
; IR-NEXT: ret void
|
||||
|
||||
|
@ -3,8 +3,8 @@
|
||||
|
||||
|
||||
; OPT-LABEL: @annotate_unreachable(
|
||||
; OPT: call { i1, i64 } @llvm.amdgcn.if(
|
||||
; OPT-NOT: call void @llvm.amdgcn.end.cf(
|
||||
; OPT: call { i1, i64 } @llvm.amdgcn.if.i64(
|
||||
; OPT-NOT: call void @llvm.amdgcn.end.cf
|
||||
|
||||
|
||||
; GCN-LABEL: {{^}}annotate_unreachable:
|
||||
|
@ -17,17 +17,17 @@ define amdgpu_kernel void @multiple_backedges(i32 %arg, i32* %arg1) {
|
||||
; OPT-NEXT: [[TMP4:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP5:%.*]], [[LOOP]] ], [ 0, [[LOOP_END]] ]
|
||||
; OPT-NEXT: [[TMP5]] = add nsw i32 [[TMP4]], [[TMP]]
|
||||
; OPT-NEXT: [[TMP6:%.*]] = icmp slt i32 [[ARG]], [[TMP5]]
|
||||
; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break(i1 [[TMP6]], i64 [[PHI_BROKEN]])
|
||||
; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop(i64 [[TMP0]])
|
||||
; OPT-NEXT: [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64.i64(i1 [[TMP6]], i64 [[PHI_BROKEN]])
|
||||
; OPT-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
|
||||
; OPT-NEXT: br i1 [[TMP1]], label [[LOOP_END]], label [[LOOP]]
|
||||
; OPT: loop_end:
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 [[TMP0]])
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
|
||||
; OPT-NEXT: [[EXIT:%.*]] = icmp sgt i32 [[TMP5]], [[TMP2]]
|
||||
; OPT-NEXT: [[TMP7]] = call i64 @llvm.amdgcn.if.break(i1 [[EXIT]], i64 [[PHI_BROKEN1]])
|
||||
; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop(i64 [[TMP7]])
|
||||
; OPT-NEXT: [[TMP7]] = call i64 @llvm.amdgcn.if.break.i64.i64(i1 [[EXIT]], i64 [[PHI_BROKEN1]])
|
||||
; OPT-NEXT: [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP7]])
|
||||
; OPT-NEXT: br i1 [[TMP3]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
|
||||
; OPT: loop_exit:
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 [[TMP7]])
|
||||
; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP7]])
|
||||
; OPT-NEXT: [[TMP12:%.*]] = zext i32 [[TMP]] to i64
|
||||
; OPT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[ARG1:%.*]], i64 [[TMP12]]
|
||||
; OPT-NEXT: [[TMP14:%.*]] = addrspacecast i32* [[TMP13]] to i32 addrspace(1)*
|
||||
|
@ -1628,19 +1628,19 @@ define float @fmed3_0_1_undef_f32() {
|
||||
; llvm.amdgcn.icmp
|
||||
; --------------------------------------------------------------------
|
||||
|
||||
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32 immarg) nounwind readnone convergent
|
||||
declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32 immarg) nounwind readnone convergent
|
||||
declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32 immarg) nounwind readnone convergent
|
||||
declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32 immarg) nounwind readnone convergent
|
||||
declare i64 @llvm.amdgcn.icmp.i64.i64(i64, i64, i32 immarg) nounwind readnone convergent
|
||||
declare i64 @llvm.amdgcn.icmp.i64.i1(i1, i1, i32 immarg) nounwind readnone convergent
|
||||
|
||||
define i64 @invalid_icmp_code(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @invalid_icmp_code(
|
||||
; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 31)
|
||||
; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A]], i32 [[B]], i32 42)
|
||||
; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 31)
|
||||
; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A]], i32 [[B]], i32 42)
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i64 [[UNDER]], [[OVER]]
|
||||
; CHECK-NEXT: ret i64 [[OR]]
|
||||
;
|
||||
%under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
|
||||
%over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42)
|
||||
%under = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 31)
|
||||
%over = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 42)
|
||||
%or = or i64 %under, %over
|
||||
ret i64 %or
|
||||
}
|
||||
@ -1649,7 +1649,7 @@ define i64 @icmp_constant_inputs_false() {
|
||||
; CHECK-LABEL: @icmp_constant_inputs_false(
|
||||
; CHECK-NEXT: ret i64 0
|
||||
;
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 32)
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 32)
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
@ -1658,236 +1658,236 @@ define i64 @icmp_constant_inputs_true() {
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) #5
|
||||
; CHECK-NEXT: ret i64 [[RESULT]]
|
||||
;
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34)
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
define i64 @icmp_constant_to_rhs_slt(i32 %x) {
|
||||
; CHECK-LABEL: @icmp_constant_to_rhs_slt(
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[X:%.*]], i32 9, i32 38)
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[X:%.*]], i32 9, i32 38)
|
||||
; CHECK-NEXT: ret i64 [[RESULT]]
|
||||
;
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 %x, i32 40)
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 %x, i32 40)
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i32 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ne i32 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 41)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 41)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp sle i32 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ugt i64 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[A:%.*]], i64 [[B:%.*]], i32 34)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ugt i64 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 0, i32 %zext.cmp, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 0, i32 %zext.cmp, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 1)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 1)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp oeq float %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 14)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp une float %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f64(double [[A:%.*]], double [[B:%.*]], i32 4)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f64(double [[A:%.*]], double [[B:%.*]], i32 4)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp olt double %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i32 %a, %b
|
||||
%sext.cmp = sext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i32 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp slt i32 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) {
|
||||
; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 14)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 14)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp oeq float %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) {
|
||||
; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 2)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 2)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp ule float %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) {
|
||||
; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 13)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 13)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp ogt float %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i32 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 1, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 1, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) {
|
||||
; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32(
|
||||
; CHECK-NEXT: [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[ZEXT_COND]], i32 0, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 0, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%zext.cond = zext i1 %cond to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 1, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 1, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) {
|
||||
; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32(
|
||||
; CHECK-NEXT: [[ZEXT_COND:%.*]] = zext i1 [[COND:%.*]] to i32
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[ZEXT_COND]], i32 -1, i32 32)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_COND]], i32 -1, i32 32)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%zext.cond = zext i1 %cond to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cond, i32 -1, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) {
|
||||
; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32(
|
||||
; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[SEXT_COND]], i32 1, i32 32)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 1, i32 32)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%sext.cond = sext i1 %cond to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 1, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) {
|
||||
; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32(
|
||||
; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i32
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[SEXT_COND]], i32 0, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_COND]], i32 0, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%sext.cond = sext i1 %cond to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 -1, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cond, i32 -1, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) {
|
||||
; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64(
|
||||
; CHECK-NEXT: [[SEXT_COND:%.*]] = sext i1 [[COND:%.*]] to i64
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64(i64 [[SEXT_COND]], i64 0, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[SEXT_COND]], i64 0, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%sext.cond = sext i1 %cond to i64
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 -1, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i64(i64 %sext.cond, i64 -1, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
@ -1896,46 +1896,46 @@ define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[SEXT_CMP:%.*]] = sext i1 [[CMP]] to i32
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[SEXT_CMP]], i32 1, i32 32)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[SEXT_CMP]], i32 1, i32 32)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i32 %a, %b
|
||||
%sext.cmp = sext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 1, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 32)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i32 %a, %b
|
||||
%sext.cmp = sext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 39)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp sge i32 %a, %b
|
||||
%sext.cmp = sext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %sext.cmp, i32 -1, i32 32)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 38)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 38)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp sle i32 %a, %b
|
||||
%not = xor i1 %cmp, true
|
||||
%zext.cmp = zext i1 %not to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
@ -1943,12 +1943,12 @@ define i64 @fold_icmp_ne_0_zext_icmp_eq_i4(i4 %a, i4 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i4(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i4 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
@ -1956,23 +1956,23 @@ define i64 @fold_icmp_ne_0_zext_icmp_eq_i8(i8 %a, i8 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i8(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 32)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i8 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_icmp_eq_i16(i16 %a, i16 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i16(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 32)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 32)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i16 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
@ -1980,12 +1980,12 @@ define i64 @fold_icmp_ne_0_zext_icmp_eq_i36(i36 %a, i36 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i36(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i36 [[A:%.*]] to i64
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext i36 [[B:%.*]] to i64
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64(i64 [[TMP1]], i64 [[TMP2]], i32 32)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i64(i64 [[TMP1]], i64 [[TMP2]], i32 32)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i36 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
@ -1993,37 +1993,36 @@ define i64 @fold_icmp_ne_0_zext_icmp_eq_i128(i128 %a, i128 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i128(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i128 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f16(half %a, half %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f16(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.f16(half [[A:%.*]], half [[B:%.*]], i32 1)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f16(half [[A:%.*]], half [[B:%.*]], i32 1)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp oeq half %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f128(fp128 %a, fp128 %b) {
|
||||
;
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f128(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[ZEXT_CMP:%.*]] = zext i1 [[CMP]] to i32
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i32(i32 [[ZEXT_CMP]], i32 0, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp oeq fp128 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
@ -2031,12 +2030,12 @@ define i64 @fold_icmp_ne_0_zext_icmp_slt_i4(i4 %a, i4 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i4(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sext i4 [[A:%.*]] to i16
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sext i4 [[B:%.*]] to i16
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp slt i4 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
@ -2044,23 +2043,23 @@ define i64 @fold_icmp_ne_0_zext_icmp_slt_i8(i8 %a, i8 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i8(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = sext i8 [[A:%.*]] to i16
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = sext i8 [[B:%.*]] to i16
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 40)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp slt i8 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_icmp_slt_i16(i16 %a, i16 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_slt_i16(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 40)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 40)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp slt i16 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
@ -2068,12 +2067,12 @@ define i64 @fold_icmp_ne_0_zext_icmp_ult_i4(i4 %a, i4 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i4(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i4 [[A:%.*]] to i16
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext i4 [[B:%.*]] to i16
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ult i4 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
@ -2081,23 +2080,23 @@ define i64 @fold_icmp_ne_0_zext_icmp_ult_i8(i8 %a, i8 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i8(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[A:%.*]] to i16
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = zext i8 [[B:%.*]] to i16
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[TMP1]], i16 [[TMP2]], i32 36)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ult i8 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_ne_0_zext_icmp_ult_i16(i16 %a, i16 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_i16(
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 36)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i16(i16 [[A:%.*]], i16 [[B:%.*]], i32 36)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ult i16 %a, %b
|
||||
%zext.cmp = zext i1 %cmp to i32
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %zext.cmp, i32 0, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
@ -2106,232 +2105,231 @@ define i64 @fold_icmp_ne_0_zext_icmp_ult_i16(i16 %a, i16 %b) {
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_eq_i1(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i1(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i32 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_ne_i1(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ne_i1(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ne i32 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_sle_i1(i32 %a, i32 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_sle_i1(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp sle i32 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_ugt_i64(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ugt_i64(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ugt i64 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_ult_swap_i64(i64 %a, i64 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_swap_i64(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ugt i64 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 false, i1 %cmp, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 false, i1 %cmp, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f32(float %a, float %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f32(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp oeq float %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_fcmp_une_f32(float %a, float %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_une_f32(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = fcmp une float [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp une float %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_fcmp_olt_f64(double %a, double %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_olt_f64(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp olt double %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_eq_i4(i4 %a, i4 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i4(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i4 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i4 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_eq_i8(i8 %a, i8 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i8(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i8 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_eq_i16(i16 %a, i16 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i16(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i16 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_eq_i36(i36 %a, i36 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i36(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i36 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i36 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_eq_i128(i128 %a, i128 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_eq_i128(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp eq i128 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f16(half %a, half %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f16(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq half [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp oeq half %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_fcmp_oeq_f128(fp128 %a, fp128 %b) {
|
||||
;
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_fcmp_oeq_f128(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq fp128 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = fcmp oeq fp128 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_slt_i4(i4 %a, i4 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i4(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i4 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp slt i4 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_slt_i8(i8 %a, i8 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i8(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp slt i8 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_slt_i16(i16 %a, i16 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_slt_i16(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp slt i16 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_ult_i4(i4 %a, i4 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i4(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i4 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ult i4 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_ult_i8(i8 %a, i8 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i8(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ult i8 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) {
|
||||
; CHECK-LABEL: @fold_icmp_i1_ne_0_icmp_ult_i16(
|
||||
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i16 [[A:%.*]], [[B:%.*]]
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: [[MASK:%.*]] = call i64 @llvm.amdgcn.icmp.i64.i1(i1 [[CMP]], i1 false, i32 33)
|
||||
; CHECK-NEXT: ret i64 [[MASK]]
|
||||
;
|
||||
%cmp = icmp ult i16 %a, %b
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i1(i1 %cmp, i1 false, i32 33)
|
||||
%mask = call i64 @llvm.amdgcn.icmp.i64.i1(i1 %cmp, i1 false, i32 33)
|
||||
ret i64 %mask
|
||||
}
|
||||
|
||||
@ -2339,17 +2337,17 @@ define i64 @fold_icmp_i1_ne_0_icmp_ult_i16(i16 %a, i16 %b) {
|
||||
; llvm.amdgcn.fcmp
|
||||
; --------------------------------------------------------------------
|
||||
|
||||
declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32 immarg) nounwind readnone convergent
|
||||
declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32 immarg) nounwind readnone convergent
|
||||
|
||||
define i64 @invalid_fcmp_code(float %a, float %b) {
|
||||
; CHECK-LABEL: @invalid_fcmp_code(
|
||||
; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A:%.*]], float [[B:%.*]], i32 -1)
|
||||
; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[A]], float [[B]], i32 16)
|
||||
; CHECK-NEXT: [[UNDER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A:%.*]], float [[B:%.*]], i32 -1)
|
||||
; CHECK-NEXT: [[OVER:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[A]], float [[B]], i32 16)
|
||||
; CHECK-NEXT: [[OR:%.*]] = or i64 [[UNDER]], [[OVER]]
|
||||
; CHECK-NEXT: ret i64 [[OR]]
|
||||
;
|
||||
%under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
|
||||
%over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16)
|
||||
%under = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 -1)
|
||||
%over = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 16)
|
||||
%or = or i64 %under, %over
|
||||
ret i64 %or
|
||||
}
|
||||
@ -2358,7 +2356,7 @@ define i64 @fcmp_constant_inputs_false() {
|
||||
; CHECK-LABEL: @fcmp_constant_inputs_false(
|
||||
; CHECK-NEXT: ret i64 0
|
||||
;
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 1)
|
||||
%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 1)
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
@ -2367,16 +2365,16 @@ define i64 @fcmp_constant_inputs_true() {
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) #5
|
||||
; CHECK-NEXT: ret i64 [[RESULT]]
|
||||
;
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4)
|
||||
%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
define i64 @fcmp_constant_to_rhs_olt(float %x) {
|
||||
; CHECK-LABEL: @fcmp_constant_to_rhs_olt(
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.fcmp.f32(float [[X:%.*]], float 4.000000e+00, i32 2)
|
||||
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.fcmp.i64.f32(float [[X:%.*]], float 4.000000e+00, i32 2)
|
||||
; CHECK-NEXT: ret i64 [[RESULT]]
|
||||
;
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4)
|
||||
%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 4.0, float %x, i32 4)
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
|
@ -123,22 +123,22 @@ define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en, i1 %bool) {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32)
|
||||
declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32)
|
||||
|
||||
define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) {
|
||||
; CHECK: immarg operand has non-immediate parameter
|
||||
; CHECK-NEXT: i32 %c
|
||||
; CHECK-NEXT: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
|
||||
%result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
|
||||
; CHECK-NEXT: %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 %c)
|
||||
%result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 %a, i32 %b, i32 %c)
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32)
|
||||
declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32)
|
||||
define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) {
|
||||
; CHECK: immarg operand has non-immediate parameter
|
||||
; CHECK-NEXT: i32 %c
|
||||
; CHECK-NEXT: %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
|
||||
%result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
|
||||
; CHECK-NEXT: %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 %c)
|
||||
%result = call i64 @llvm.amdgcn.fcmp.i64.f32(float %a, float %b, i32 %c)
|
||||
ret i64 %result
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user