[InstCombine] simplify masked load intrinsics with all ones or zeros masks

A masked load with a zero mask means there's no load.
A masked load with an allOnes mask means it's a normal vector load.

Differential Revision: http://reviews.llvm.org/D16691



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@259369 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sanjay Patel 2016-02-01 17:00:10 +00:00
parent f4cc19cd22
commit 0a9644c134
2 changed files with 32 additions and 21 deletions

View File

@ -753,6 +753,26 @@ static Value *simplifyMinnumMaxnum(const IntrinsicInst &II) {
return nullptr;
}
static Value *simplifyMaskedLoad(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
if (!ConstMask)
return nullptr;
// If the mask is all zeros, the "passthru" argument is the result.
if (ConstMask->isNullValue())
return II.getArgOperand(3);
// If the mask is all ones, this is a plain vector load of the 1st argument.
if (ConstMask->isAllOnesValue()) {
Value *LoadPtr = II.getArgOperand(0);
unsigned Alignment = cast<ConstantInt>(II.getArgOperand(1))->getZExtValue();
return Builder.CreateAlignedLoad(LoadPtr, Alignment, "unmaskedload");
}
return nullptr;
}
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallSite to do the heavy
/// lifting.
@ -877,6 +897,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::masked_load:
if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II, *Builder))
return ReplaceInstUsesWith(CI, SimplifiedMaskedOp);
break;
// TODO: Handle the other masked ops.
// case Intrinsic::masked_store:
// case Intrinsic::masked_gather:
// case Intrinsic::masked_scatter:
case Intrinsic::powi:
if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// powi(x, 0) -> 1.0

View File

@ -2,15 +2,13 @@
declare <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
; FIXME: All of these could be simplified.
define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) {
%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
ret <2 x double> %res
; CHECK-LABEL: @load_zeromask(
; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
; CHECK-NEXT ret <2 x double> %res
; CHECK-NEXT ret <2 x double> %passthru
}
define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) {
@ -18,24 +16,7 @@ define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) {
ret <2 x double> %res
; CHECK-LABEL: @load_onemask(
; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 true, i1 true>, <2 x double> %passthru)
; CHECK-NEXT: %unmaskedload = load <2 x double>, <2 x double>* %ptr, align 2
; CHECK-NEXT ret <2 x double> %res
}
define <2 x double> @load_onesetbitmask1(<2 x double>* %ptr, <2 x double> %passthru) {
%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 3, <2 x i1> <i1 0, i1 1>, <2 x double> %passthru)
ret <2 x double> %res
; CHECK-LABEL: @load_onesetbitmask1(
; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 3, <2 x i1> <i1 false, i1 true>, <2 x double> %passthru)
; CHECK-NEXT ret <2 x double> %res
}
define <2 x double> @load_onesetbitmask2(<2 x double>* %ptr, <2 x double> %passthru) {
%res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 0>, <2 x double> %passthru)
ret <2 x double> %res
; CHECK-LABEL: @load_onesetbitmask2(
; CHECK-NEXT: %res = call <2 x double> @llvm.masked.load.v2f64(<2 x double>* %ptr, i32 4, <2 x i1> <i1 true, i1 false>, <2 x double> %passthru)
; CHECK-NEXT ret <2 x double> %res
}