mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-30 15:10:33 +00:00
[X86][AVX] Allow 32-bit targets to peek through subvectors to extract constant splats for vXi64 shifts.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@303009 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
dc5d066e45
commit
8390fe6ccf
@ -21888,10 +21888,19 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
|
||||
}
|
||||
|
||||
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
|
||||
// TODO: Replace constant extraction with getTargetConstantBitsFromNode.
|
||||
if (!Subtarget.is64Bit() && !Subtarget.hasXOP() &&
|
||||
(VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) ||
|
||||
(Subtarget.hasAVX512() && VT == MVT::v8i64))) {
|
||||
|
||||
// AVX1 targets maybe extracting a 128-bit vector from a 256-bit constant.
|
||||
unsigned SubVectorScale = 1;
|
||||
if (Amt.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
|
||||
SubVectorScale =
|
||||
Amt.getOperand(0).getValueSizeInBits() / Amt.getValueSizeInBits();
|
||||
Amt = Amt.getOperand(0);
|
||||
}
|
||||
|
||||
// Peek through any splat that was introduced for i64 shift vectorization.
|
||||
int SplatIndex = -1;
|
||||
if (ShuffleVectorSDNode *SVN = dyn_cast<ShuffleVectorSDNode>(Amt.getNode()))
|
||||
@ -21908,7 +21917,7 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
|
||||
|
||||
Amt = Amt.getOperand(0);
|
||||
unsigned Ratio = Amt.getSimpleValueType().getVectorNumElements() /
|
||||
VT.getVectorNumElements();
|
||||
(SubVectorScale * VT.getVectorNumElements());
|
||||
unsigned RatioInLog2 = Log2_32_Ceil(Ratio);
|
||||
uint64_t ShiftAmt = 0;
|
||||
unsigned BaseOp = (SplatIndex < 0 ? 0 : SplatIndex * Ratio);
|
||||
|
@ -176,16 +176,9 @@ define <2 x double> @signbits_ashr_concat_ashr_extract_sitofp(<2 x i64> %a0, <4
|
||||
; X32-NEXT: vpsrad $16, %xmm0, %xmm1
|
||||
; X32-NEXT: vpsrlq $16, %xmm0, %xmm0
|
||||
; X32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7]
|
||||
; X32-NEXT: vmovaps {{.*#+}} ymm1 = [16,0,16,0,16,0,16,0]
|
||||
; X32-NEXT: vextractf128 $1, %ymm1, %xmm1
|
||||
; X32-NEXT: vmovdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
|
||||
; X32-NEXT: vpsrlq %xmm1, %xmm2, %xmm2
|
||||
; X32-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; X32-NEXT: vpxor %xmm2, %xmm0, %xmm0
|
||||
; X32-NEXT: vpsubq %xmm2, %xmm0, %xmm0
|
||||
; X32-NEXT: vpsrlq $16, %xmm0, %xmm0
|
||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; X32-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; X32-NEXT: vzeroupper
|
||||
; X32-NEXT: retl
|
||||
;
|
||||
; X64-LABEL: signbits_ashr_concat_ashr_extract_sitofp:
|
||||
|
@ -1719,19 +1719,14 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
|
||||
;
|
||||
; X32-AVX1-LABEL: splatconstant_shift_v4i64:
|
||||
; X32-AVX1: # BB#0:
|
||||
; X32-AVX1-NEXT: vmovdqa {{.*#+}} ymm1 = [7,0,7,0,7,0,7,0]
|
||||
; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; X32-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648]
|
||||
; X32-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm4
|
||||
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
|
||||
; X32-AVX1-NEXT: vpsrlq %xmm2, %xmm5, %xmm2
|
||||
; X32-AVX1-NEXT: vpxor %xmm4, %xmm2, %xmm2
|
||||
; X32-AVX1-NEXT: vpsubq %xmm4, %xmm2, %xmm2
|
||||
; X32-AVX1-NEXT: vpsrlq %xmm1, %xmm3, %xmm3
|
||||
; X32-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vpsubq %xmm3, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; X32-AVX1-NEXT: vpsrad $7, %xmm1, %xmm2
|
||||
; X32-AVX1-NEXT: vpsrlq $7, %xmm1, %xmm1
|
||||
; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
|
||||
; X32-AVX1-NEXT: vpsrad $7, %xmm0, %xmm2
|
||||
; X32-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7]
|
||||
; X32-AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; X32-AVX1-NEXT: retl
|
||||
;
|
||||
; X32-AVX2-LABEL: splatconstant_shift_v4i64:
|
||||
|
@ -1355,12 +1355,10 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
|
||||
;
|
||||
; X32-AVX1-LABEL: splatconstant_shift_v4i64:
|
||||
; X32-AVX1: # BB#0:
|
||||
; X32-AVX1-NEXT: vmovdqa {{.*#+}} ymm1 = [7,0,7,0,7,0,7,0]
|
||||
; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; X32-AVX1-NEXT: vpsrlq %xmm2, %xmm3, %xmm2
|
||||
; X32-AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; X32-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm1
|
||||
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; X32-AVX1-NEXT: vpsrlq $7, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X32-AVX1-NEXT: retl
|
||||
;
|
||||
; X32-AVX2-LABEL: splatconstant_shift_v4i64:
|
||||
|
@ -1195,12 +1195,10 @@ define <4 x i64> @splatconstant_shift_v4i64(<4 x i64> %a) nounwind {
|
||||
;
|
||||
; X32-AVX1-LABEL: splatconstant_shift_v4i64:
|
||||
; X32-AVX1: # BB#0:
|
||||
; X32-AVX1-NEXT: vmovdqa {{.*#+}} ymm1 = [7,0,7,0,7,0,7,0]
|
||||
; X32-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
|
||||
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; X32-AVX1-NEXT: vpsllq %xmm2, %xmm3, %xmm2
|
||||
; X32-AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; X32-AVX1-NEXT: vpsllq $7, %xmm0, %xmm1
|
||||
; X32-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; X32-AVX1-NEXT: vpsllq $7, %xmm0, %xmm0
|
||||
; X32-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; X32-AVX1-NEXT: retl
|
||||
;
|
||||
; X32-AVX2-LABEL: splatconstant_shift_v4i64:
|
||||
|
Loading…
Reference in New Issue
Block a user