From 120da12eeb3c8e5b81c59605fb49ecabf3b168e3 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 27 Sep 2019 18:12:15 +0000 Subject: [PATCH] [InstCombine] Simplify shift-by-sext to shift-by-zext Summary: This is valid for any `sext` bitwidth pair: ``` Processing /tmp/opt.ll.. ---------------------------------------- %signed = sext %y %r = shl %x, %signed ret %r => %unsigned = zext %y %r = shl %x, %unsigned ret %r %signed = sext %y Done: 2016 Optimization is correct! ``` (This isn't so for funnel shifts, there it's illegal for e.g. i6->i7.) Main motivation is the C++ semantics: ``` int shl(int a, char b) { return a << b; } ``` ends as ``` %3 = sext i8 %1 to i32 %4 = shl i32 %0, %3 ``` https://godbolt.org/z/0jgqUq which is, as this shows, too pessimistic. There is another problem here - we can only do the fold if sext is one-use. But we can trivially have cases where several shifts have the same sext shift amount. This should be resolved, later. Reviewers: spatel, nikic, RKSimon Reviewed By: spatel Subscribers: efriedma, hiraditya, nlopes, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68103 llvm-svn: 373106 --- .../InstCombine/InstCombineShifts.cpp | 7 ++++++ test/Transforms/InstCombine/load-cmp.ll | 2 +- .../InstCombine/shift-by-signext.ll | 24 +++++++++---------- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index c88827f916c..ab1e484df93 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -241,6 +241,13 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); assert(Op0->getType() == Op1->getType()); + // If the shift amount is a one-use `sext`, we can demote it to `zext`. + Value *Y; + if (match(Op1, m_OneUse(m_SExt(m_Value(Y))))) { + Value *NewExt = Builder.CreateZExt(Y, I.getType(), Op1->getName()); + return BinaryOperator::Create(I.getOpcode(), Op0, NewExt); + } + // See if we can fold away this shift. if (SimplifyDemandedInstructionBits(I)) return &I; diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll index ace1fb0ad26..7b24ce309a9 100644 --- a/test/Transforms/InstCombine/load-cmp.ll +++ b/test/Transforms/InstCombine/load-cmp.ll @@ -105,7 +105,7 @@ define i1 @test4(i32 %X) { define i1 @test4_i16(i16 %X) { ; CHECK-LABEL: @test4_i16( -; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = zext i16 [[X:%.*]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 933, [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1 ; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP3]], 0 diff --git a/test/Transforms/InstCombine/shift-by-signext.ll b/test/Transforms/InstCombine/shift-by-signext.ll index 9d6cf90dc19..b049dfac6f8 100644 --- a/test/Transforms/InstCombine/shift-by-signext.ll +++ b/test/Transforms/InstCombine/shift-by-signext.ll @@ -6,8 +6,8 @@ define i32 @t0_shl(i32 %x, i8 %shamt) { ; CHECK-LABEL: @t0_shl( -; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32 -; CHECK-NEXT: [[R:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE]] +; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[R:%.*]] = shl i32 [[X:%.*]], [[SHAMT_WIDE1]] ; CHECK-NEXT: ret i32 [[R]] ; %shamt_wide = sext i8 %shamt to i32 @@ -16,8 +16,8 @@ define i32 @t0_shl(i32 %x, i8 %shamt) { } define i32 @t1_lshr(i32 %x, i8 %shamt) { ; CHECK-LABEL: @t1_lshr( -; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32 -; CHECK-NEXT: [[R:%.*]] = lshr i32 [[X:%.*]], [[SHAMT_WIDE]] +; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[R:%.*]] = lshr i32 [[X:%.*]], [[SHAMT_WIDE1]] ; CHECK-NEXT: ret i32 [[R]] ; %shamt_wide = sext i8 %shamt to i32 @@ -26,8 +26,8 @@ define i32 @t1_lshr(i32 %x, i8 %shamt) { } define i32 @t2_ashr(i32 %x, i8 %shamt) { ; CHECK-LABEL: @t2_ashr( -; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext i8 [[SHAMT:%.*]] to i32 -; CHECK-NEXT: [[R:%.*]] = ashr i32 [[X:%.*]], [[SHAMT_WIDE]] +; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext i8 [[SHAMT:%.*]] to i32 +; CHECK-NEXT: [[R:%.*]] = ashr i32 [[X:%.*]], [[SHAMT_WIDE1]] ; CHECK-NEXT: ret i32 [[R]] ; %shamt_wide = sext i8 %shamt to i32 @@ -37,8 +37,8 @@ define i32 @t2_ashr(i32 %x, i8 %shamt) { define <2 x i32> @t3_vec_shl(<2 x i32> %x, <2 x i8> %shamt) { ; CHECK-LABEL: @t3_vec_shl( -; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext <2 x i8> [[SHAMT:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[X:%.*]], [[SHAMT_WIDE]] +; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext <2 x i8> [[SHAMT:%.*]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = shl <2 x i32> [[X:%.*]], [[SHAMT_WIDE1]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shamt_wide = sext <2 x i8> %shamt to <2 x i32> @@ -47,8 +47,8 @@ define <2 x i32> @t3_vec_shl(<2 x i32> %x, <2 x i8> %shamt) { } define <2 x i32> @t4_vec_lshr(<2 x i32> %x, <2 x i8> %shamt) { ; CHECK-LABEL: @t4_vec_lshr( -; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext <2 x i8> [[SHAMT:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[X:%.*]], [[SHAMT_WIDE]] +; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext <2 x i8> [[SHAMT:%.*]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = lshr <2 x i32> [[X:%.*]], [[SHAMT_WIDE1]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shamt_wide = sext <2 x i8> %shamt to <2 x i32> @@ -57,8 +57,8 @@ define <2 x i32> @t4_vec_lshr(<2 x i32> %x, <2 x i8> %shamt) { } define <2 x i32> @t5_vec_ashr(<2 x i32> %x, <2 x i8> %shamt) { ; CHECK-LABEL: @t5_vec_ashr( -; CHECK-NEXT: [[SHAMT_WIDE:%.*]] = sext <2 x i8> [[SHAMT:%.*]] to <2 x i32> -; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X:%.*]], [[SHAMT_WIDE]] +; CHECK-NEXT: [[SHAMT_WIDE1:%.*]] = zext <2 x i8> [[SHAMT:%.*]] to <2 x i32> +; CHECK-NEXT: [[R:%.*]] = ashr <2 x i32> [[X:%.*]], [[SHAMT_WIDE1]] ; CHECK-NEXT: ret <2 x i32> [[R]] ; %shamt_wide = sext <2 x i8> %shamt to <2 x i32>