mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-26 05:00:26 +00:00
Use MOVSSmr instead of EXTRACTPSmr in the case of extracting
vector element 0 for a store, as it's smaller and faster. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@58483 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8fe95356dd
commit
d17cfbe1ca
@ -4194,11 +4194,15 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op,
|
|||||||
} else if (VT == MVT::f32) {
|
} else if (VT == MVT::f32) {
|
||||||
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
|
// EXTRACTPS outputs to a GPR32 register which will require a movd to copy
|
||||||
// the result back to FR32 register. It's only worth matching if the
|
// the result back to FR32 register. It's only worth matching if the
|
||||||
// result has a single use which is a store or a bitcast to i32.
|
// result has a single use which is a store or a bitcast to i32. And in
|
||||||
|
// the case of a store, it's not worth it if the index is a constant 0,
|
||||||
|
// because a MOVSSmr can be used instead, which is smaller and faster.
|
||||||
if (!Op.hasOneUse())
|
if (!Op.hasOneUse())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
SDNode *User = *Op.getNode()->use_begin();
|
SDNode *User = *Op.getNode()->use_begin();
|
||||||
if (User->getOpcode() != ISD::STORE &&
|
if ((User->getOpcode() != ISD::STORE ||
|
||||||
|
(isa<ConstantSDNode>(Op.getOperand(1)) &&
|
||||||
|
cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) &&
|
||||||
(User->getOpcode() != ISD::BIT_CONVERT ||
|
(User->getOpcode() != ISD::BIT_CONVERT ||
|
||||||
User->getValueType(0) != MVT::i32))
|
User->getValueType(0) != MVT::i32))
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn > %t
|
; RUN: llvm-as < %s | llc -march=x86 -mcpu=penryn > %t
|
||||||
; RUN: not grep movd %t
|
; RUN: not grep movd %t
|
||||||
; RUN: not grep movss %t
|
; RUN: grep {movss %xmm} %t | count 1
|
||||||
; RUN: grep {extractps \\\$0, %xmm0, } %t
|
; RUN: grep {extractps \\\$1, %xmm0, } %t | count 1
|
||||||
; PR2647
|
; PR2647
|
||||||
|
|
||||||
external global float, align 16 ; <float*>:0 [#uses=2]
|
external global float, align 16 ; <float*>:0 [#uses=2]
|
||||||
@ -14,6 +14,14 @@ define internal void @""() nounwind {
|
|||||||
store float %4, float* @0, align 16
|
store float %4, float* @0, align 16
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
define internal void @""() nounwind {
|
||||||
|
load float* @0, align 16 ; <float>:1 [#uses=1]
|
||||||
|
insertelement <4 x float> undef, float %1, i32 1 ; <<4 x float>>:2 [#uses=1]
|
||||||
|
call <4 x float> @llvm.x86.sse.rsqrt.ss( <4 x float> %2 ) ; <<4 x float>>:3 [#uses=1]
|
||||||
|
extractelement <4 x float> %3, i32 1 ; <float>:4 [#uses=1]
|
||||||
|
store float %4, float* @0, align 16
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user