From 2982845bcce853027f051adcd698e30206ee7ca1 Mon Sep 17 00:00:00 2001 From: David Majnemer Date: Sat, 6 Aug 2016 08:16:00 +0000 Subject: [PATCH] [ValueTracking] Teach computeKnownBits about [su]min/max Reasoning about a select in terms of a min or max allows us to derive a tigher bound on the result. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@277914 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ValueTracking.cpp | 51 ++++++++++++++++++- test/Transforms/InstCombine/sext.ll | 12 +++++ .../interleaved-accesses-pred-stores.ll | 6 +-- .../LoopVectorize/interleaved-accesses.ll | 8 +-- 4 files changed, 69 insertions(+), 8 deletions(-) diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index f2b40787443..c3a5e6af62b 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -950,14 +950,63 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero, KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); break; } - case Instruction::Select: + case Instruction::Select: { computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q); computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); + Value *LHS, *RHS; + SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor; + if (SelectPatternResult::isMinOrMax(SPF)) { + computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(LHS, KnownZero2, KnownOne2, Depth + 1, Q); + } else { + computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q); + computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q); + } + + unsigned MaxHighOnes = 0; + unsigned MaxHighZeros = 0; + if (SPF == SPF_SMAX) { + // If both sides are negative, the result is negative. + if (KnownOne[BitWidth - 1] && KnownOne2[BitWidth - 1]) + // We can derive a lower bound on the result by taking the max of the + // leading one bits. + MaxHighOnes = + std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); + // If either side is non-negative, the result is non-negative. + else if (KnownZero[BitWidth - 1] || KnownZero2[BitWidth - 1]) + MaxHighZeros = 1; + } else if (SPF == SPF_SMIN) { + // If both sides are non-negative, the result is non-negative. + if (KnownZero[BitWidth - 1] && KnownZero2[BitWidth - 1]) + // We can derive an upper bound on the result by taking the max of the + // leading zero bits. + MaxHighZeros = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + // If either side is negative, the result is negative. + else if (KnownOne[BitWidth - 1] || KnownOne2[BitWidth - 1]) + MaxHighOnes = 1; + } else if (SPF == SPF_UMAX) { + // We can derive a lower bound on the result by taking the max of the + // leading one bits. + MaxHighOnes = + std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes()); + } else if (SPF == SPF_UMIN) { + // We can derive an upper bound on the result by taking the max of the + // leading zero bits. + MaxHighZeros = + std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); + } + // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; KnownZero &= KnownZero2; + if (MaxHighOnes > 0) + KnownOne |= APInt::getHighBitsSet(BitWidth, MaxHighOnes); + if (MaxHighZeros > 0) + KnownZero |= APInt::getHighBitsSet(BitWidth, MaxHighZeros); break; + } case Instruction::FPTrunc: case Instruction::FPExt: case Instruction::FPToUI: diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll index f04afcc747b..4caa0f1fbb1 100644 --- a/test/Transforms/InstCombine/sext.ll +++ b/test/Transforms/InstCombine/sext.ll @@ -193,3 +193,15 @@ define i32 @test17(i1 %x) nounwind { ; CHECK-NEXT: [[TEST17:%.*]] = zext i1 %x to i32 ; CHECK-NEXT: ret i32 [[TEST17]] } + +define i32 @test18(i16 %x) { + %cmp = icmp slt i16 %x, 0 + %sel = select i1 %cmp, i16 0, i16 %x + %ext = sext i16 %sel to i32 + ret i32 %ext +; CHECK-LABEL: @test18( +; CHECK-NEXT: %[[cmp:.*]] = icmp slt i16 %x, 0 +; CHECK-NEXT: %[[sel:.*]] = select i1 %[[cmp]], i16 0, i16 %x +; CHECK-NEXT: %[[ext:.*]] = zext i16 %[[sel]] to i32 +; CHECK-NEXT: ret i32 %[[ext]] +} diff --git a/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll index 9ee6e6d529a..99a063b8c6e 100644 --- a/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll +++ b/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll @@ -13,7 +13,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 ; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf -; CHECK: %n.vec = sub i64 %[[N]], %[[R]] +; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]] ; ; CHECK: vector.body: ; CHECK: %wide.vec = load <4 x i64>, <4 x i64>* %{{.*}} @@ -62,7 +62,7 @@ for.end: ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 ; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf -; CHECK: %n.vec = sub i64 %[[N]], %[[R]] +; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]] ; ; CHECK: vector.body: ; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}} @@ -121,7 +121,7 @@ for.end: ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 ; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf -; CHECK: %n.vec = sub i64 %[[N]], %[[R]] +; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]] ; ; CHECK: vector.body: ; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}} diff --git a/test/Transforms/LoopVectorize/interleaved-accesses.ll b/test/Transforms/LoopVectorize/interleaved-accesses.ll index 868c3a2cdab..34998782aa8 100644 --- a/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -577,7 +577,7 @@ for.body: ; preds = %for.body, %entry ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 ; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf -; CHECK: %n.vec = sub i64 %[[N]], %[[R]] +; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]] ; CHECK: vector.body: ; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}} ; CHECK: %[[X1:.+]] = extractelement <8 x i32> %[[L1]], i32 0 @@ -625,7 +625,7 @@ for.end: ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 ; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf -; CHECK: %n.vec = sub i64 %[[N]], %[[R]] +; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]] ; CHECK: vector.body: ; CHECK: %[[Phi:.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ {{.*}}, %vector.body ] ; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}} @@ -678,7 +678,7 @@ for.end: ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 ; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf -; CHECK: %n.vec = sub i64 %[[N]], %[[R]] +; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]] ; CHECK: vector.body: ; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}} ; CHECK: %[[X1:.+]] = extractelement <8 x i32> %[[L1]], i32 0 @@ -726,7 +726,7 @@ for.end: ; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3 ; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0 ; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf -; CHECK: %n.vec = sub i64 %[[N]], %[[R]] +; CHECK: %n.vec = sub nsw i64 %[[N]], %[[R]] ; CHECK: vector.body: ; CHECK: %[[Phi:.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ {{.*}}, %vector.body ] ; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}}