diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index b07aae466a3..13c38caa1e6 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1061,17 +1061,23 @@ static bool canEvaluateShuffled(Value *V, ArrayRef Mask, if (Depth == 0) return false; switch (I->getOpcode()) { + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::URem: + case Instruction::SRem: + // Propagating an undefined shuffle mask element to integer div/rem is not + // allowed because those opcodes can create immediate undefined behavior + // from an undefined element in an operand. + if (llvm::any_of(Mask, [](int M){ return M == -1; })) + return false; + LLVM_FALLTHROUGH; case Instruction::Add: case Instruction::FAdd: case Instruction::Sub: case Instruction::FSub: case Instruction::Mul: case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: case Instruction::FRem: case Instruction::Shl: case Instruction::LShr: @@ -1092,9 +1098,7 @@ static bool canEvaluateShuffled(Value *V, ArrayRef Mask, case Instruction::FPExt: case Instruction::GetElementPtr: { // Bail out if we would create longer vector ops. We could allow creating - // longer vector ops, but that may result in more expensive codegen. We - // would also need to limit the transform to avoid undefined behavior for - // integer div/rem. + // longer vector ops, but that may result in more expensive codegen. Type *ITy = I->getType(); if (ITy->isVectorTy() && Mask.size() > ITy->getVectorNumElements()) return false; diff --git a/test/Transforms/InstCombine/shufflevector-div-rem.ll b/test/Transforms/InstCombine/shufflevector-div-rem.ll index ee63a25653b..8364aab0aa2 100644 --- a/test/Transforms/InstCombine/shufflevector-div-rem.ll +++ b/test/Transforms/InstCombine/shufflevector-div-rem.ll @@ -8,7 +8,12 @@ ; extracting the second element in the vector). define i16 @test_srem_orig(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_srem_orig( -; CHECK-NEXT: ret i16 1 +; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[A:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = srem <2 x i16> [[SPLATINSERT]], +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[TMP1]], <2 x i16> undef, <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] +; CHECK-NEXT: [[T3:%.*]] = extractelement <2 x i16> [[T2]], i32 1 +; CHECK-NEXT: ret i16 [[T3]] ; %splatinsert = insertelement <2 x i16> undef, i16 %a, i32 0 %splat = shufflevector <2 x i16> %splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer @@ -24,7 +29,11 @@ define i16 @test_srem_orig(i16 %a, i1 %cmp) { ; "evaluateInDifferentElementOrder". define <2 x i16> @test_srem(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_srem( -; CHECK-NEXT: ret <2 x i16> +; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[A:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = srem <2 x i16> [[SPLATINSERT]], +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] +; CHECK-NEXT: ret <2 x i16> [[T2]] ; %splatinsert = insertelement <2 x i16> undef, i16 %a, i32 0 %t1 = srem <2 x i16> %splatinsert, @@ -35,7 +44,11 @@ define <2 x i16> @test_srem(i16 %a, i1 %cmp) { define <2 x i16> @test_urem(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_urem( -; CHECK-NEXT: ret <2 x i16> +; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[A:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = urem <2 x i16> [[SPLATINSERT]], +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] +; CHECK-NEXT: ret <2 x i16> [[T2]] ; %splatinsert = insertelement <2 x i16> undef, i16 %a, i32 0 %t1 = urem <2 x i16> %splatinsert, @@ -46,7 +59,11 @@ define <2 x i16> @test_urem(i16 %a, i1 %cmp) { define <2 x i16> @test_sdiv(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_sdiv( -; CHECK-NEXT: ret <2 x i16> +; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[A:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = sdiv <2 x i16> [[SPLATINSERT]], +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] +; CHECK-NEXT: ret <2 x i16> [[T2]] ; %splatinsert = insertelement <2 x i16> undef, i16 %a, i32 0 %t1 = sdiv <2 x i16> %splatinsert, @@ -57,7 +74,11 @@ define <2 x i16> @test_sdiv(i16 %a, i1 %cmp) { define <2 x i16> @test_udiv(i16 %a, i1 %cmp) { ; CHECK-LABEL: @test_udiv( -; CHECK-NEXT: ret <2 x i16> +; CHECK-NEXT: [[SPLATINSERT:%.*]] = insertelement <2 x i16> undef, i16 [[A:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = udiv <2 x i16> [[SPLATINSERT]], +; CHECK-NEXT: [[SPLAT_OP:%.*]] = shufflevector <2 x i16> [[T1]], <2 x i16> undef, <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = select i1 [[CMP:%.*]], <2 x i16> , <2 x i16> [[SPLAT_OP]] +; CHECK-NEXT: ret <2 x i16> [[T2]] ; %splatinsert = insertelement <2 x i16> undef, i16 %a, i32 0 %t1 = udiv <2 x i16> %splatinsert,