mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-04 20:20:54 +00:00
[IR] llvm::createMinMaxOp - create integer min/max intrinsics instead of icmp/sel
Based off D148215, when expanding a min/max reduction we should be creating min/max intrinsics directly instead of relying on instcombine to fold them back together. This patch handles integer min/max cases. Hopefully we can add floating point support soon (at least for fastmath/nnan cases) - but we're missing some of the plumbing to pass the correct FMF to the intrinsic at the moment. Differential Revision: https://reviews.llvm.org/D148221
This commit is contained in:
parent
be58b42a75
commit
aa754f7e0f
@ -353,6 +353,9 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
|
||||
SinkAndHoistLICMFlags &LICMFlags,
|
||||
OptimizationRemarkEmitter *ORE = nullptr);
|
||||
|
||||
/// Returns the min/max intrinsic used when expanding a min/max reduction.
|
||||
Intrinsic::ID getMinMaxReductionIntrinsicOp(RecurKind RK);
|
||||
|
||||
/// Returns the comparison predicate used when expanding a min/max reduction.
|
||||
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK);
|
||||
|
||||
|
@ -893,6 +893,25 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
|
||||
return true;
|
||||
}
|
||||
|
||||
Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
|
||||
switch (RK) {
|
||||
default:
|
||||
llvm_unreachable("Unknown min/max recurrence kind");
|
||||
case RecurKind::UMin:
|
||||
return Intrinsic::umin;
|
||||
case RecurKind::UMax:
|
||||
return Intrinsic::umax;
|
||||
case RecurKind::SMin:
|
||||
return Intrinsic::smin;
|
||||
case RecurKind::SMax:
|
||||
return Intrinsic::smax;
|
||||
case RecurKind::FMin:
|
||||
return Intrinsic::minnum;
|
||||
case RecurKind::FMax:
|
||||
return Intrinsic::maxnum;
|
||||
}
|
||||
}
|
||||
|
||||
CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
|
||||
switch (RK) {
|
||||
default:
|
||||
@ -923,6 +942,13 @@ Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
|
||||
|
||||
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
|
||||
Value *Right) {
|
||||
Type *Ty = Left->getType();
|
||||
if (Ty->isIntOrIntVectorTy()) {
|
||||
// TODO: Add float minnum/maxnum support when FMF nnan is set.
|
||||
Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
|
||||
return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
|
||||
"rdx.minmax");
|
||||
}
|
||||
CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);
|
||||
Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
|
||||
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
|
||||
|
@ -225,9 +225,8 @@ define i64 @smax_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @smax_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
||||
; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.smax.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]])
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
@ -239,9 +238,8 @@ define i64 @smin_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @smin_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
||||
; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.smin.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]])
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
@ -253,9 +251,8 @@ define i64 @umax_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @umax_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
||||
; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.umax.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]])
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
@ -267,9 +264,8 @@ define i64 @umin_i64(<2 x i64> %vec) {
|
||||
; CHECK-LABEL: @umin_i64(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <2 x i64> [[VEC:%.*]], <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ult <2 x i64> [[VEC]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <2 x i1> [[RDX_MINMAX_CMP]], <2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX_SELECT]], i32 0
|
||||
; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <2 x i64> @llvm.umin.v2i64(<2 x i64> [[VEC]], <2 x i64> [[RDX_SHUF]])
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i64> [[RDX_MINMAX]], i32 0
|
||||
; CHECK-NEXT: ret i64 [[TMP0]]
|
||||
;
|
||||
entry:
|
||||
|
@ -137,9 +137,8 @@ define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
|
||||
; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ICMP1]], <vscale x 8 x i32> %[[LOAD1]]
|
||||
; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ICMP2]], <vscale x 8 x i32> %[[LOAD2]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK: %[[ICMP:.*]] = icmp slt <vscale x 8 x i32> %[[SEL1]], %[[SEL2]]
|
||||
; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[ICMP]], <vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]]
|
||||
; CHECK-NEXT: call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %[[SEL]])
|
||||
; CHECK: %[[RDX:.*]] = call <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]])
|
||||
; CHECK-NEXT: call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %[[RDX]])
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@ -171,9 +170,8 @@ define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
|
||||
; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ICMP1]], <vscale x 8 x i32> %[[LOAD1]]
|
||||
; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ICMP2]], <vscale x 8 x i32> %[[LOAD2]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK: %[[ICMP:.*]] = icmp ugt <vscale x 8 x i32> %[[SEL1]], %[[SEL2]]
|
||||
; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[ICMP]], <vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]]
|
||||
; CHECK-NEXT: call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> %[[SEL]])
|
||||
; CHECK: %[[RDX:.*]] = call <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]])
|
||||
; CHECK-NEXT: call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> %[[RDX]])
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
|
@ -141,9 +141,8 @@ define i32 @smin(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
|
||||
; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ICMP1]], <vscale x 8 x i32> %[[LOAD1]]
|
||||
; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ICMP2]], <vscale x 8 x i32> %[[LOAD2]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK: %[[ICMP:.*]] = icmp slt <vscale x 8 x i32> %[[SEL1]], %[[SEL2]]
|
||||
; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[ICMP]], <vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]]
|
||||
; CHECK-NEXT: call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %[[SEL]])
|
||||
; CHECK: %[[RDX:.*]] = call <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]])
|
||||
; CHECK-NEXT: call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %[[RDX]])
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
@ -175,9 +174,8 @@ define i32 @umax(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) {
|
||||
; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[ICMP1]], <vscale x 8 x i32> %[[LOAD1]]
|
||||
; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[ICMP2]], <vscale x 8 x i32> %[[LOAD2]]
|
||||
; CHECK: middle.block:
|
||||
; CHECK: %[[ICMP:.*]] = icmp ugt <vscale x 8 x i32> %[[SEL1]], %[[SEL2]]
|
||||
; CHECK-NEXT: %[[SEL:.*]] = select <vscale x 8 x i1> %[[ICMP]], <vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]]
|
||||
; CHECK-NEXT: call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> %[[SEL]])
|
||||
; CHECK: %[[RDX:.*]] = call <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32> %[[SEL1]], <vscale x 8 x i32> %[[SEL2]])
|
||||
; CHECK-NEXT: call i32 @llvm.vector.reduce.umax.nxv8i32(<vscale x 8 x i32> %[[RDX]])
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
|
@ -265,9 +265,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) {
|
||||
; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
; UNROLL-NO-IC: middle.block:
|
||||
; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP17]], [[TMP18]]
|
||||
; UNROLL-NO-IC-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP17]], <4 x i32> [[TMP18]]
|
||||
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX_SELECT]])
|
||||
; UNROLL-NO-IC-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP17]], <4 x i32> [[TMP18]])
|
||||
; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX]])
|
||||
; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
||||
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 3
|
||||
; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
@ -337,17 +336,16 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) {
|
||||
; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
|
||||
; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
|
||||
; UNROLL-NO-VF: middle.block:
|
||||
; UNROLL-NO-VF-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP15]], [[TMP16]]
|
||||
; UNROLL-NO-VF-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP15]], i32 [[TMP16]]
|
||||
; UNROLL-NO-VF-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP15]], i32 [[TMP16]])
|
||||
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
|
||||
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
|
||||
; UNROLL-NO-VF: scalar.ph:
|
||||
; UNROLL-NO-VF-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
|
||||
; UNROLL-NO-VF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
|
||||
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
|
||||
; UNROLL-NO-VF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
|
||||
; UNROLL-NO-VF-NEXT: br label [[SCALAR_BODY:%.*]]
|
||||
; UNROLL-NO-VF: for.cond.cleanup.loopexit:
|
||||
; UNROLL-NO-VF-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
|
||||
; UNROLL-NO-VF-NEXT: [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[RDX_MINMAX]], [[MIDDLE_BLOCK]] ]
|
||||
; UNROLL-NO-VF-NEXT: br label [[FOR_COND_CLEANUP]]
|
||||
; UNROLL-NO-VF: for.cond.cleanup:
|
||||
; UNROLL-NO-VF-NEXT: [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
|
||||
|
@ -139,12 +139,10 @@ define i32 @smin_v4i32(ptr %p) #0 {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0]]
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP1]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP4:%.*]] = icmp slt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0
|
||||
; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]])
|
||||
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX2:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[RDX_SHUF3]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX2]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[TMP2]]
|
||||
;
|
||||
entry:
|
||||
@ -192,12 +190,10 @@ define i32 @umax_v4i32(ptr %p) #0 {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4, !tbaa [[TBAA0]]
|
||||
; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
|
||||
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX_CMP4:%.*]] = icmp ugt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
|
||||
; CHECK-NEXT: [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]]
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0
|
||||
; CHECK-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]])
|
||||
; CHECK-NEXT: [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[RDX_MINMAX2:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[RDX_SHUF3]])
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX2]], i32 0
|
||||
; CHECK-NEXT: ret i32 [[TMP2]]
|
||||
;
|
||||
entry:
|
||||
|
Loading…
Reference in New Issue
Block a user