mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-03 11:08:32 +00:00
Optimized instruction sequence for sitofp operation on X86-32
Optimized sitofp i64 %x to double. The current sequence movl %ecx, 8(%esp) movl %edx, 12(%esp) fildll 8(%esp) is replaced with: movd %ecx, %xmm0 movd %edx, %xmm1 punpckldq %xmm1, %xmm0 movq %xmm0, 8(%esp) Differential Revision: http://reviews.llvm.org/D15946 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257285 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ca4af1ae34
commit
d6de44078b
@ -265,7 +265,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
// Without SSE, i64->f64 goes through memory.
|
||||
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
|
||||
}
|
||||
}
|
||||
} else if (!Subtarget->is64Bit())
|
||||
setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
|
||||
|
||||
// Scalar integer divide and remainder are lowered to use operations that
|
||||
// produce two results, to match the available instructions. This exposes
|
||||
@ -12672,13 +12673,21 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
|
||||
return Op;
|
||||
}
|
||||
|
||||
SDValue ValueToStore = Op.getOperand(0);
|
||||
if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
|
||||
!Subtarget->is64Bit())
|
||||
// Bitcasting to f64 here allows us to do a single 64-bit store from
|
||||
// an SSE register, avoiding the store forwarding penalty that would come
|
||||
// with two 32-bit stores.
|
||||
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
|
||||
|
||||
unsigned Size = SrcVT.getSizeInBits()/8;
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
auto PtrVT = getPointerTy(MF.getDataLayout());
|
||||
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
|
||||
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
|
||||
SDValue Chain = DAG.getStore(
|
||||
DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot,
|
||||
DAG.getEntryNode(), dl, ValueToStore, StackSlot,
|
||||
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false,
|
||||
false, 0);
|
||||
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
|
||||
@ -13051,7 +13060,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
|
||||
}
|
||||
|
||||
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
|
||||
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
|
||||
SDValue ValueToStore = Op.getOperand(0);
|
||||
if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit())
|
||||
// Bitcasting to f64 here allows us to do a single 64-bit store from
|
||||
// an SSE register, avoiding the store forwarding penalty that would come
|
||||
// with two 32-bit stores.
|
||||
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
|
||||
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore,
|
||||
StackSlot, MachinePointerInfo(),
|
||||
false, false, 0);
|
||||
// For i64 source, we need to add the appropriate power of 2 if the input
|
||||
@ -19536,24 +19551,37 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
|
||||
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
|
||||
MVT DstVT = Op.getSimpleValueType();
|
||||
|
||||
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) {
|
||||
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
|
||||
SrcVT == MVT::i64) {
|
||||
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
|
||||
if (DstVT != MVT::f64)
|
||||
// This conversion needs to be expanded.
|
||||
return SDValue();
|
||||
|
||||
SDValue InVec = Op->getOperand(0);
|
||||
SDValue Op0 = Op->getOperand(0);
|
||||
SmallVector<SDValue, 16> Elts;
|
||||
SDLoc dl(Op);
|
||||
unsigned NumElts = SrcVT.getVectorNumElements();
|
||||
MVT SVT = SrcVT.getVectorElementType();
|
||||
unsigned NumElts;
|
||||
MVT SVT;
|
||||
if (SrcVT.isVector()) {
|
||||
NumElts = SrcVT.getVectorNumElements();
|
||||
SVT = SrcVT.getVectorElementType();
|
||||
|
||||
// Widen the vector in input in the case of MVT::v2i32.
|
||||
// Example: from MVT::v2i32 to MVT::v4i32.
|
||||
SmallVector<SDValue, 16> Elts;
|
||||
for (unsigned i = 0, e = NumElts; i != e; ++i)
|
||||
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec,
|
||||
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0,
|
||||
DAG.getIntPtrConstant(i, dl)));
|
||||
|
||||
} else {
|
||||
assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() &&
|
||||
"Unexpected source type in LowerBITCAST");
|
||||
Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
|
||||
DAG.getIntPtrConstant(0, dl)));
|
||||
Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
|
||||
DAG.getIntPtrConstant(1, dl)));
|
||||
NumElts = 2;
|
||||
SVT = MVT::i32;
|
||||
}
|
||||
// Explicitly mark the extra elements as Undef.
|
||||
Elts.append(NumElts, DAG.getUNDEF(SVT));
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14
|
||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 13
|
||||
|
||||
define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind {
|
||||
entry:
|
||||
|
@ -74,9 +74,16 @@ define x86_fp80 @s32_to_x(i32 %a) nounwind {
|
||||
}
|
||||
|
||||
; CHECK-LABEL: u64_to_f
|
||||
; AVX512_32: vmovq {{.*#+}} xmm0 = mem[0],zero
|
||||
; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512_32: fildll
|
||||
|
||||
; AVX512_64: vcvtusi2ssq
|
||||
|
||||
; SSE2_32: movq {{.*#+}} xmm0 = mem[0],zero
|
||||
; SSE2_32: movq %xmm0, {{[0-9]+}}(%esp)
|
||||
; SSE2_32: fildll
|
||||
|
||||
; SSE2_64: cvtsi2ssq
|
||||
; X87: fildll
|
||||
define float @u64_to_f(i64 %a) nounwind {
|
||||
@ -95,6 +102,24 @@ define float @s64_to_f(i64 %a) nounwind {
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: s64_to_f_2
|
||||
; SSE2_32: movd %ecx, %xmm0
|
||||
; SSE2_32: movd %eax, %xmm1
|
||||
; SSE2_32: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
|
||||
; SSE2_32: movq %xmm1, {{[0-9]+}}(%esp)
|
||||
; SSE2_32: fildll {{[0-9]+}}(%esp)
|
||||
|
||||
; AVX512_32: vmovd %eax, %xmm0
|
||||
; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512_32: fildll {{[0-9]+}}(%esp)
|
||||
|
||||
define float @s64_to_f_2(i64 %a) nounwind {
|
||||
%a1 = add i64 %a, 5
|
||||
%r = sitofp i64 %a1 to float
|
||||
ret float %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: u64_to_d
|
||||
; AVX512_32: vpunpckldq
|
||||
; AVX512_64: vcvtusi2sdq
|
||||
@ -117,6 +142,24 @@ define double @s64_to_d(i64 %a) nounwind {
|
||||
ret double %r
|
||||
}
|
||||
|
||||
; CHECK-LABEL: s64_to_d_2
|
||||
; SSE2_32: movd %ecx, %xmm0
|
||||
; SSE2_32: movd %eax, %xmm1
|
||||
; SSE2_32: punpckldq %xmm0, %xmm1
|
||||
; SSE2_32: movq %xmm1, {{[0-9]+}}(%esp)
|
||||
; SSE2_32: fildll
|
||||
|
||||
; AVX512_32: vmovd %eax, %xmm0
|
||||
; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
|
||||
; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
|
||||
; AVX512_32: fildll
|
||||
|
||||
define double @s64_to_d_2(i64 %a) nounwind {
|
||||
%b = add i64 %a, 5
|
||||
%f = sitofp i64 %b to double
|
||||
ret double %f
|
||||
}
|
||||
|
||||
; CHECK-LABEL: u64_to_x
|
||||
; CHECK: fildll
|
||||
define x86_fp80 @u64_to_x(i64 %a) nounwind {
|
||||
|
Loading…
x
Reference in New Issue
Block a user