mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:50:30 +00:00
[X86][SSE] Use SplitOpsAndApply to improve HADD/HSUB lowering
Improve AVX1 256-bit vector HADD/HSUB matching by using SplitOpsAndApply to split into 128-bit instructions. llvm-svn: 337568
This commit is contained in:
parent
7279a94c5c
commit
f7afa0a451
@ -38871,10 +38871,16 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
|
||||
return MAdd;
|
||||
|
||||
// Try to synthesize horizontal adds from adds of shuffles.
|
||||
if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
|
||||
(Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
|
||||
isHorizontalBinOp(Op0, Op1, true))
|
||||
return DAG.getNode(X86ISD::HADD, SDLoc(N), VT, Op0, Op1);
|
||||
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
|
||||
VT == MVT::v8i32) &&
|
||||
Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, true)) {
|
||||
auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
|
||||
ArrayRef<SDValue> Ops) {
|
||||
return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops);
|
||||
};
|
||||
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
|
||||
HADDBuilder);
|
||||
}
|
||||
|
||||
if (SDValue V = combineIncDecVector(N, DAG))
|
||||
return V;
|
||||
@ -38996,10 +39002,16 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
|
||||
|
||||
// Try to synthesize horizontal subs from subs of shuffles.
|
||||
EVT VT = N->getValueType(0);
|
||||
if (((Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32)) ||
|
||||
(Subtarget.hasInt256() && (VT == MVT::v16i16 || VT == MVT::v8i32))) &&
|
||||
isHorizontalBinOp(Op0, Op1, false))
|
||||
return DAG.getNode(X86ISD::HSUB, SDLoc(N), VT, Op0, Op1);
|
||||
if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
|
||||
VT == MVT::v8i32) &&
|
||||
Subtarget.hasSSSE3() && isHorizontalBinOp(Op0, Op1, false)) {
|
||||
auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
|
||||
ArrayRef<SDValue> Ops) {
|
||||
return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops);
|
||||
};
|
||||
return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
|
||||
HSUBBuilder);
|
||||
}
|
||||
|
||||
if (SDValue V = combineIncDecVector(N, DAG))
|
||||
return V;
|
||||
|
@ -262,13 +262,10 @@ define <8 x i32> @hadd_v8i32b(<8 x i32> %a) {
|
||||
;
|
||||
; AVX1-LABEL: hadd_v8i32b:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm0[0,2,2,3,4,6,6,7]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,2,3,5,7,6,7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; AVX1-NEXT: vpaddd %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -339,13 +336,10 @@ define <8 x i32> @hsub_v8i32b(<8 x i32> %a) {
|
||||
;
|
||||
; AVX1-LABEL: hsub_v8i32b:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm1 = ymm0[0,2,2,3,4,6,6,7]
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[1,3,2,3,5,7,6,7]
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
|
||||
; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vphsubd %xmm0, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vphsubd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -416,16 +410,10 @@ define <16 x i16> @hadd_v16i16b(<16 x i16> %a) {
|
||||
;
|
||||
; AVX1-LABEL: hadd_v16i16b:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpshufb %xmm1, %xmm3, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
|
||||
; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpaddw %xmm0, %xmm2, %xmm0
|
||||
; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpaddw %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
@ -496,16 +484,10 @@ define <16 x i16> @hsub_v16i16b(<16 x i16> %a) {
|
||||
;
|
||||
; AVX1-LABEL: hsub_v16i16b:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm2
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
|
||||
; AVX1-NEXT: vpshufb %xmm1, %xmm3, %xmm1
|
||||
; AVX1-NEXT: vmovdqa {{.*#+}} xmm4 = [2,3,6,7,10,11,14,15,14,15,10,11,12,13,14,15]
|
||||
; AVX1-NEXT: vpshufb %xmm4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsubw %xmm0, %xmm2, %xmm0
|
||||
; AVX1-NEXT: vpshufb %xmm4, %xmm3, %xmm2
|
||||
; AVX1-NEXT: vpsubw %xmm2, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: vphsubw %xmm0, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX1-NEXT: vphsubw %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
|
@ -51,8 +51,7 @@ define i32 @_Z10test_shortPsS_i(i16* nocapture readonly, i16* nocapture readonly
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -188,8 +187,7 @@ define i32 @_Z10test_shortPsS_i_512(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -338,8 +336,7 @@ define i32 @test_unsigned_short(i16* nocapture readonly, i16* nocapture readonly
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -486,8 +483,7 @@ define i32 @test_unsigned_short_512(i16* nocapture readonly, i16* nocapture read
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -666,8 +662,7 @@ define i32 @_Z9test_charPcS_i(i8* nocapture readonly, i8* nocapture readonly, i3
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -894,8 +889,7 @@ define i32 @_Z9test_charPcS_i_512(i8* nocapture readonly, i8* nocapture readonly
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
|
@ -56,8 +56,7 @@ define i32 @sad_16i8() nounwind {
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -318,8 +317,7 @@ define i32 @sad_32i8() nounwind {
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -860,8 +858,7 @@ define i32 @sad_avx64i8() nounwind {
|
||||
; AVX1-NEXT: vpaddd %xmm0, %xmm14, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: addq $24, %rsp
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
@ -257,8 +257,7 @@ define i32 @test_v8i32(<8 x i32> %a0) {
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -321,8 +320,7 @@ define i32 @test_v16i32(<16 x i32> %a0) {
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -403,8 +401,7 @@ define i32 @test_v32i32(<32 x i32> %a0) {
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddd %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
@ -530,8 +527,7 @@ define i16 @test_v16i16(<16 x i16> %a0) {
|
||||
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
@ -610,8 +606,7 @@ define i16 @test_v32i16(<32 x i16> %a0) {
|
||||
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
@ -708,8 +703,7 @@ define i16 @test_v64i16(<64 x i16> %a0) {
|
||||
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
|
||||
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vphaddw %xmm0, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %eax
|
||||
; AVX1-NEXT: # kill: def $ax killed $ax killed $eax
|
||||
; AVX1-NEXT: vzeroupper
|
||||
|
Loading…
Reference in New Issue
Block a user