[X86] Do not lower scalar sdiv/udiv to a shifts + mul sequence when optimizing for minsize

There are some cases where the mul sequence is smaller, but for the most part, using a div is preferable. This does not apply to vectors, since x86 doesn't have vector idiv, and a vector mul/shifts sequence ought to be smaller than a scalarized division. Differential Revision: http://reviews.llvm.org/D12082 llvm-svn: 245431
2024-12-03 00:47:07 +00:00 · 2015-08-19 11:21:43 +00:00 · 2015-08-19 11:21:43 +00:00 · d5d8fe4ef2
commit d5d8fe4ef2
parent fcab5e1388
4 changed files with 58 additions and 0 deletions
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -26427,3 +26427,14 @@ int X86TargetLowering::getScalingFactorCost(const DataLayout &DL,
 bool X86TargetLowering::isTargetFTOL() const {
  return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit();
 }
+
+bool X86TargetLowering::isIntDivCheap(EVT VT, bool OptSize) const {
+  // Integer division on x86 is expensive. However, when aggressively optimizing
+  // for code size, we prefer to use a div instruction, as it is usually smaller
+  // than the alternative sequence.
+  // The exception to this is vector division. Since x86 doesn't have vector
+  // integer division, leaving the division as-is is a loss even in terms of
+  // size, because it will have to be scalarized, while the alternative code
+  // sequence can be performed in vector form.
+  return OptSize && !VT.isVector();
+}
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -902,6 +902,8 @@ namespace llvm {
    /// \brief Customize the preferred legalization strategy for certain types.
    LegalizeTypeAction getPreferredVectorAction(EVT VT) const override;

+    bool isIntDivCheap(EVT VT, bool OptSize) const override;
+
  protected:
    std::pair<const TargetRegisterClass *, uint8_t>
    findRepresentativeClass(const TargetRegisterInfo *TRI,
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@ -94,3 +94,35 @@ define i8 @test9(i8 %x) nounwind {
 ; CHECK: shrl $11
 ; CHECK: ret
 }
+
+define i32 @testsize1(i32 %x) minsize nounwind {
+entry:
+	%div = sdiv i32 %x, 32
+	ret i32 %div
+; CHECK-LABEL: testsize1:
+; CHECK: divl
+}
+
+define i32 @testsize2(i32 %x) minsize nounwind {
+entry:
+	%div = sdiv i32 %x, 33
+	ret i32 %div
+; CHECK-LABEL: testsize2:
+; CHECK: divl
+}
+
+define i32 @testsize3(i32 %x) minsize nounwind {
+entry:
+	%div = udiv i32 %x, 32
+	ret i32 %div
+; CHECK-LABEL: testsize3:
+; CHECK: shrl
+}
+
+define i32 @testsize4(i32 %x) minsize nounwind {
+entry:
+	%div = udiv i32 %x, 33
+	ret i32 %div
+; CHECK-LABEL: testsize4:
+; CHECK: divl
+}
--- a/test/CodeGen/X86/vec_sdiv_to_shift.ll
+++ b/test/CodeGen/X86/vec_sdiv_to_shift.ll
@ -13,6 +13,19 @@ entry:
  ret <8 x i16> %0
 }

+define <8 x i16> @sdiv_vec8x16_minsize(<8 x i16> %var) minsize {
+entry:
+; CHECK: sdiv_vec8x16_minsize
+; CHECK: psraw  $15
+; CHECK: vpsrlw  $11
+; CHECK: vpaddw
+; CHECK: vpsraw  $5
+; CHECK: ret
+  %0 = sdiv <8 x i16> %var, <i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32, i16 32>
+  ret <8 x i16> %0
+}
+
+
 define <4 x i32> @sdiv_zero(<4 x i32> %var) {
 entry:
 ; CHECK: sdiv_zero