mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-08 20:18:33 +00:00
[SelectionDAG] Codesize: don't expand SHIFT to SHIFT_PARTS
And instead just generate a libcall. My motivating example on ARM was a simple: shl i64 %A, %B for which the code bloat is quite significant. For other targets that also accept __int128/i128 such as AArch64 and X86, it is also beneficial for these cases to generate a libcall when optimising for minsize. On these 64-bit targets, the 64-bits shifts are of course unaffected because the SHIFT/SHIFT_PARTS lowering operation action is not set to custom/expand. Differential Revision: https://reviews.llvm.org/D57386 llvm-svn: 352736
This commit is contained in:
parent
826d1a3623
commit
068d715728
@ -642,6 +642,13 @@ public:
|
||||
return RepRegClassCostForVT[VT.SimpleTy];
|
||||
}
|
||||
|
||||
/// Return true if SHIFT instructions should be expanded to SHIFT_PARTS
|
||||
/// instructions, and false if a library call is preferred (e.g for code-size
|
||||
/// reasons).
|
||||
virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Return true if the target has native support for the specified value type.
|
||||
/// This means that it has a register that directly holds it without
|
||||
/// promotions or expansions.
|
||||
|
@ -2765,11 +2765,15 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
|
||||
}
|
||||
|
||||
// Next check to see if the target supports this SHL_PARTS operation or if it
|
||||
// will custom expand it.
|
||||
// will custom expand it. Don't lower this to SHL_PARTS when we optimise for
|
||||
// size, but create a libcall instead.
|
||||
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
|
||||
TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
|
||||
if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
|
||||
Action == TargetLowering::Custom) {
|
||||
const bool LegalOrCustom =
|
||||
(Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
|
||||
Action == TargetLowering::Custom;
|
||||
|
||||
if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) {
|
||||
// Expand the subcomponents.
|
||||
SDValue LHSL, LHSH;
|
||||
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
|
||||
|
@ -469,6 +469,12 @@ public:
|
||||
return VT.getSizeInBits() >= 64; // vector 'bic'
|
||||
}
|
||||
|
||||
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
|
||||
if (DAG.getMachineFunction().getFunction().optForMinSize())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool shouldTransformSignedTruncationCheck(EVT XVT,
|
||||
unsigned KeptBits) const override {
|
||||
// For vectors, we don't have a preference..
|
||||
|
@ -567,6 +567,12 @@ class VectorType;
|
||||
return HasStandaloneRem;
|
||||
}
|
||||
|
||||
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
|
||||
if (DAG.getMachineFunction().getFunction().optForMinSize())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
|
||||
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
|
||||
|
||||
|
@ -831,6 +831,12 @@ namespace llvm {
|
||||
return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
|
||||
}
|
||||
|
||||
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
|
||||
if (DAG.getMachineFunction().getFunction().optForMinSize())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool shouldSplatInsEltVarIndex(EVT VT) const override;
|
||||
|
||||
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
|
||||
|
122
test/CodeGen/AArch64/shift_minsize.ll
Normal file
122
test/CodeGen/AArch64/shift_minsize.ll
Normal file
@ -0,0 +1,122 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
|
||||
|
||||
define i64 @f0(i64 %val, i64 %amt) minsize optsize {
|
||||
; CHECK-LABEL: f0:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: lsl x0, x0, x1
|
||||
; CHECK-NEXT: ret
|
||||
%res = shl i64 %val, %amt
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
define i32 @f1(i64 %x, i64 %y) minsize optsize {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: lsl x0, x0, x1
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%a = shl i64 %x, %y
|
||||
%b = trunc i64 %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i32 @f2(i64 %x, i64 %y) minsize optsize {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: asr x0, x0, x1
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%a = ashr i64 %x, %y
|
||||
%b = trunc i64 %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i32 @f3(i64 %x, i64 %y) minsize optsize {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: lsr x0, x0, x1
|
||||
; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
|
||||
; CHECK-NEXT: ret
|
||||
%a = lshr i64 %x, %y
|
||||
%b = trunc i64 %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define dso_local { i64, i64 } @shl128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
|
||||
; CHECK-LABEL: shl128:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: .cfi_offset w30, -16
|
||||
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||
; CHECK-NEXT: bl __ashlti3
|
||||
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
|
||||
%x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
|
||||
%x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
|
||||
%x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
|
||||
%conv = sext i8 %y to i32
|
||||
%sh_prom = zext i32 %conv to i128
|
||||
%shl = shl i128 %x.sroa.0.0.insert.insert, %sh_prom
|
||||
%retval.sroa.0.0.extract.trunc = trunc i128 %shl to i64
|
||||
%retval.sroa.2.0.extract.shift = lshr i128 %shl, 64
|
||||
%retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
|
||||
%.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
|
||||
%.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
|
||||
ret { i64, i64 } %.fca.1.insert
|
||||
}
|
||||
|
||||
define dso_local { i64, i64 } @ashr128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
|
||||
; CHECK-LABEL: ashr128:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: .cfi_offset w30, -16
|
||||
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||
; CHECK-NEXT: bl __ashrti3
|
||||
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
|
||||
%x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
|
||||
%x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
|
||||
%x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
|
||||
%conv = sext i8 %y to i32
|
||||
%sh_prom = zext i32 %conv to i128
|
||||
%shr = ashr i128 %x.sroa.0.0.insert.insert, %sh_prom
|
||||
%retval.sroa.0.0.extract.trunc = trunc i128 %shr to i64
|
||||
%retval.sroa.2.0.extract.shift = lshr i128 %shr, 64
|
||||
%retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
|
||||
%.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
|
||||
%.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
|
||||
ret { i64, i64 } %.fca.1.insert
|
||||
}
|
||||
|
||||
define dso_local { i64, i64 } @lshr128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
|
||||
; CHECK-LABEL: lshr128:
|
||||
; CHECK: // %bb.0: // %entry
|
||||
; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: .cfi_offset w30, -16
|
||||
; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2
|
||||
; CHECK-NEXT: bl __lshrti3
|
||||
; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
|
||||
; CHECK-NEXT: ret
|
||||
entry:
|
||||
%x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
|
||||
%x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
|
||||
%x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
|
||||
%x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
|
||||
%conv = sext i8 %y to i32
|
||||
%sh_prom = zext i32 %conv to i128
|
||||
%shr = lshr i128 %x.sroa.0.0.insert.insert, %sh_prom
|
||||
%retval.sroa.0.0.extract.trunc = trunc i128 %shr to i64
|
||||
%retval.sroa.2.0.extract.shift = lshr i128 %shr, 64
|
||||
%retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
|
||||
%.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
|
||||
%.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
|
||||
ret { i64, i64 } %.fca.1.insert
|
||||
}
|
32
test/CodeGen/ARM/shift_minsize.ll
Normal file
32
test/CodeGen/ARM/shift_minsize.ll
Normal file
@ -0,0 +1,32 @@
|
||||
; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s
|
||||
|
||||
define i64 @f0(i64 %val, i64 %amt) minsize optsize {
|
||||
; CHECK-LABEL: f0:
|
||||
; CHECK: bl __aeabi_llsl
|
||||
%res = shl i64 %val, %amt
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
define i32 @f1(i64 %x, i64 %y) minsize optsize {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: bl __aeabi_llsl
|
||||
%a = shl i64 %x, %y
|
||||
%b = trunc i64 %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i32 @f2(i64 %x, i64 %y) minsize optsize {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: bl __aeabi_lasr
|
||||
%a = ashr i64 %x, %y
|
||||
%b = trunc i64 %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i32 @f3(i64 %x, i64 %y) minsize optsize {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: bl __aeabi_llsr
|
||||
%a = lshr i64 %x, %y
|
||||
%b = trunc i64 %a to i32
|
||||
ret i32 %b
|
||||
}
|
134
test/CodeGen/X86/shift_minsize.ll
Normal file
134
test/CodeGen/X86/shift_minsize.ll
Normal file
@ -0,0 +1,134 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
|
||||
|
||||
define i64 @f0(i64 %val, i64 %amt) minsize optsize {
|
||||
; CHECK-LABEL: f0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rsi, %rcx
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; CHECK-NEXT: shlq %cl, %rax
|
||||
; CHECK-NEXT: retq
|
||||
%res = shl i64 %val, %amt
|
||||
ret i64 %res
|
||||
}
|
||||
|
||||
define i32 @f1(i64 %x, i64 %y) minsize optsize {
|
||||
; CHECK-LABEL: f1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rsi, %rcx
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; CHECK-NEXT: shlq %cl, %rax
|
||||
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = shl i64 %x, %y
|
||||
%b = trunc i64 %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i32 @f2(i64 %x, i64 %y) minsize optsize {
|
||||
; CHECK-LABEL: f2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rsi, %rcx
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; CHECK-NEXT: sarq %cl, %rax
|
||||
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = ashr i64 %x, %y
|
||||
%b = trunc i64 %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define i32 @f3(i64 %x, i64 %y) minsize optsize {
|
||||
; CHECK-LABEL: f3:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movq %rsi, %rcx
|
||||
; CHECK-NEXT: movq %rdi, %rax
|
||||
; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx
|
||||
; CHECK-NEXT: shrq %cl, %rax
|
||||
; CHECK-NEXT: # kill: def $eax killed $eax killed $rax
|
||||
; CHECK-NEXT: retq
|
||||
%a = lshr i64 %x, %y
|
||||
%b = trunc i64 %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define dso_local { i64, i64 } @shl128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
|
||||
; CHECK-LABEL: shl128:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: movzbl %dl, %edx
|
||||
; CHECK-NEXT: callq __ashlti3
|
||||
; CHECK-NEXT: popq %rcx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
|
||||
%x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
|
||||
%x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
|
||||
%x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
|
||||
%conv = sext i8 %y to i32
|
||||
%sh_prom = zext i32 %conv to i128
|
||||
%shl = shl i128 %x.sroa.0.0.insert.insert, %sh_prom
|
||||
%retval.sroa.0.0.extract.trunc = trunc i128 %shl to i64
|
||||
%retval.sroa.2.0.extract.shift = lshr i128 %shl, 64
|
||||
%retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
|
||||
%.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
|
||||
%.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
|
||||
ret { i64, i64 } %.fca.1.insert
|
||||
}
|
||||
|
||||
define dso_local { i64, i64 } @ashr128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
|
||||
; CHECK-LABEL: ashr128:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: callq __ashrti3
|
||||
; CHECK-NEXT: popq %rcx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
|
||||
%x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
|
||||
%x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
|
||||
%x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
|
||||
%conv = sext i8 %y to i32
|
||||
%sh_prom = zext i32 %conv to i128
|
||||
%shr = ashr i128 %x.sroa.0.0.insert.insert, %sh_prom
|
||||
%retval.sroa.0.0.extract.trunc = trunc i128 %shr to i64
|
||||
%retval.sroa.2.0.extract.shift = lshr i128 %shr, 64
|
||||
%retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
|
||||
%.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
|
||||
%.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
|
||||
ret { i64, i64 } %.fca.1.insert
|
||||
}
|
||||
|
||||
define dso_local { i64, i64 } @lshr128(i64 %x.coerce0, i64 %x.coerce1, i8 signext %y) minsize optsize {
|
||||
; CHECK-LABEL: lshr128:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: pushq %rax
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 16
|
||||
; CHECK-NEXT: movzbl %dl, %edx
|
||||
; CHECK-NEXT: callq __lshrti3
|
||||
; CHECK-NEXT: popq %rcx
|
||||
; CHECK-NEXT: .cfi_def_cfa_offset 8
|
||||
; CHECK-NEXT: retq
|
||||
entry:
|
||||
%x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
|
||||
%x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
|
||||
%x.sroa.0.0.insert.ext = zext i64 %x.coerce0 to i128
|
||||
%x.sroa.0.0.insert.insert = or i128 %x.sroa.2.0.insert.shift, %x.sroa.0.0.insert.ext
|
||||
%conv = sext i8 %y to i32
|
||||
%sh_prom = zext i32 %conv to i128
|
||||
%shr = lshr i128 %x.sroa.0.0.insert.insert, %sh_prom
|
||||
%retval.sroa.0.0.extract.trunc = trunc i128 %shr to i64
|
||||
%retval.sroa.2.0.extract.shift = lshr i128 %shr, 64
|
||||
%retval.sroa.2.0.extract.trunc = trunc i128 %retval.sroa.2.0.extract.shift to i64
|
||||
%.fca.0.insert = insertvalue { i64, i64 } undef, i64 %retval.sroa.0.0.extract.trunc, 0
|
||||
%.fca.1.insert = insertvalue { i64, i64 } %.fca.0.insert, i64 %retval.sroa.2.0.extract.trunc, 1
|
||||
ret { i64, i64 } %.fca.1.insert
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user