Optimized instruction sequence for sitofp operation on X86-32

Optimized sitofp i64 %x to double. The current sequence

movl %ecx, 8(%esp) 
movl %edx, 12(%esp) 
fildll 8(%esp)

is replaced with:

movd %ecx, %xmm0 
movd %edx, %xmm1 
punpckldq %xmm1, %xmm0 
movq %xmm0, 8(%esp)

Differential Revision: http://reviews.llvm.org/D15946



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@257285 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Elena Demikhovsky 2016-01-10 09:41:22 +00:00
parent ca4af1ae34
commit d6de44078b
3 changed files with 86 additions and 15 deletions

View File

@ -265,7 +265,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Without SSE, i64->f64 goes through memory.
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
}
} else if (!Subtarget->is64Bit())
setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
@ -12672,13 +12673,21 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
return Op;
}
SDValue ValueToStore = Op.getOperand(0);
if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
!Subtarget->is64Bit())
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
// with two 32-bit stores.
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Chain = DAG.getStore(
DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot,
DAG.getEntryNode(), dl, ValueToStore, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false,
false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
@ -13051,7 +13060,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
SDValue ValueToStore = Op.getOperand(0);
if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit())
// Bitcasting to f64 here allows us to do a single 64-bit store from
// an SSE register, avoiding the store forwarding penalty that would come
// with two 32-bit stores.
ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore,
StackSlot, MachinePointerInfo(),
false, false, 0);
// For i64 source, we need to add the appropriate power of 2 if the input
@ -19536,24 +19551,37 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) {
if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
SrcVT == MVT::i64) {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
if (DstVT != MVT::f64)
// This conversion needs to be expanded.
return SDValue();
SDValue InVec = Op->getOperand(0);
SDLoc dl(Op);
unsigned NumElts = SrcVT.getVectorNumElements();
MVT SVT = SrcVT.getVectorElementType();
// Widen the vector in input in the case of MVT::v2i32.
// Example: from MVT::v2i32 to MVT::v4i32.
SDValue Op0 = Op->getOperand(0);
SmallVector<SDValue, 16> Elts;
for (unsigned i = 0, e = NumElts; i != e; ++i)
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec,
DAG.getIntPtrConstant(i, dl)));
SDLoc dl(Op);
unsigned NumElts;
MVT SVT;
if (SrcVT.isVector()) {
NumElts = SrcVT.getVectorNumElements();
SVT = SrcVT.getVectorElementType();
// Widen the vector in input in the case of MVT::v2i32.
// Example: from MVT::v2i32 to MVT::v4i32.
for (unsigned i = 0, e = NumElts; i != e; ++i)
Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0,
DAG.getIntPtrConstant(i, dl)));
} else {
assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() &&
"Unexpected source type in LowerBITCAST");
Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
DAG.getIntPtrConstant(0, dl)));
Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
DAG.getIntPtrConstant(1, dl)));
NumElts = 2;
SVT = MVT::i32;
}
// Explicitly mark the extra elements as Undef.
Elts.append(NumElts, DAG.getUNDEF(SVT));

View File

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14
; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 13
define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind {
entry:

View File

@ -74,9 +74,16 @@ define x86_fp80 @s32_to_x(i32 %a) nounwind {
}
; CHECK-LABEL: u64_to_f
; AVX512_32: vmovq {{.*#+}} xmm0 = mem[0],zero
; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
; AVX512_32: fildll
; AVX512_64: vcvtusi2ssq
; SSE2_32: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2_32: movq %xmm0, {{[0-9]+}}(%esp)
; SSE2_32: fildll
; SSE2_64: cvtsi2ssq
; X87: fildll
define float @u64_to_f(i64 %a) nounwind {
@ -95,6 +102,24 @@ define float @s64_to_f(i64 %a) nounwind {
ret float %r
}
; CHECK-LABEL: s64_to_f_2
; SSE2_32: movd %ecx, %xmm0
; SSE2_32: movd %eax, %xmm1
; SSE2_32: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE2_32: movq %xmm1, {{[0-9]+}}(%esp)
; SSE2_32: fildll {{[0-9]+}}(%esp)
; AVX512_32: vmovd %eax, %xmm0
; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
; AVX512_32: fildll {{[0-9]+}}(%esp)
define float @s64_to_f_2(i64 %a) nounwind {
%a1 = add i64 %a, 5
%r = sitofp i64 %a1 to float
ret float %r
}
; CHECK-LABEL: u64_to_d
; AVX512_32: vpunpckldq
; AVX512_64: vcvtusi2sdq
@ -117,6 +142,24 @@ define double @s64_to_d(i64 %a) nounwind {
ret double %r
}
; CHECK-LABEL: s64_to_d_2
; SSE2_32: movd %ecx, %xmm0
; SSE2_32: movd %eax, %xmm1
; SSE2_32: punpckldq %xmm0, %xmm1
; SSE2_32: movq %xmm1, {{[0-9]+}}(%esp)
; SSE2_32: fildll
; AVX512_32: vmovd %eax, %xmm0
; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
; AVX512_32: fildll
define double @s64_to_d_2(i64 %a) nounwind {
%b = add i64 %a, 5
%f = sitofp i64 %b to double
ret double %f
}
; CHECK-LABEL: u64_to_x
; CHECK: fildll
define x86_fp80 @u64_to_x(i64 %a) nounwind {