mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-28 07:05:03 +00:00
[x86] Revert my over-eager commit in r217332.
I hadn't actually run all the tests yet and these combines have somewhat surprisingly far reaching effects. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217333 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e328c5ea83
commit
8ceea90956
@ -19320,42 +19320,26 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
|
||||
// Use the float domain if the operand type is a floating point type.
|
||||
bool FloatDomain = VT.isFloatingPoint();
|
||||
|
||||
// For floating point shuffles, we don't have free copies in the shuffle
|
||||
// instructions, so this always makes sense to canonicalize.
|
||||
// If we don't have access to VEX encodings, the generic PSHUF instructions
|
||||
// are preferable to some of the specialized forms despite requiring one more
|
||||
// byte to encode because they can implicitly copy.
|
||||
//
|
||||
// For integer shuffles, if we don't have access to VEX encodings, the generic
|
||||
// PSHUF instructions are preferable to some of the specialized forms despite
|
||||
// requiring one more byte to encode because they can implicitly copy.
|
||||
//
|
||||
// IF we *do* have VEX encodings, then we can use shorter, more specific
|
||||
// IF we *do* have VEX encodings, than we can use shorter, more specific
|
||||
// shuffle instructions freely as they can copy due to the extra register
|
||||
// operand.
|
||||
if (FloatDomain || Subtarget->hasAVX()) {
|
||||
if (Subtarget->hasAVX()) {
|
||||
// We have both floating point and integer variants of shuffles that dup
|
||||
// either the low or high half of the vector.
|
||||
if (Mask.equals(0, 0) || Mask.equals(1, 1)) {
|
||||
bool Lo = Mask.equals(0, 0);
|
||||
unsigned Shuffle;
|
||||
// If the input is a floating point, check if we have SSE3 which will let
|
||||
// us use MOVDDUP. That instruction is no slower than UNPCKLPD but has the
|
||||
// option to fold the input operand into even an unaligned memory load.
|
||||
if (FloatDomain && Lo && Subtarget->hasSSE3()) {
|
||||
Shuffle = X86ISD::MOVDDUP;
|
||||
} else {
|
||||
// We model everything else using UNPCK instructions. While MOVLHPS and
|
||||
// MOVHLPS are shorter encodings they cannot accept a memory operand
|
||||
// which overly constrains subsequent lowering.
|
||||
Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
|
||||
}
|
||||
unsigned Shuffle = FloatDomain ? (Lo ? X86ISD::MOVLHPS : X86ISD::MOVHLPS)
|
||||
: (Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH);
|
||||
if (Depth == 1 && Root->getOpcode() == Shuffle)
|
||||
return false; // Nothing to do!
|
||||
MVT ShuffleVT = FloatDomain ? MVT::v2f64 : MVT::v2i64;
|
||||
MVT ShuffleVT = FloatDomain ? MVT::v4f32 : MVT::v2i64;
|
||||
Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
|
||||
DCI.AddToWorklist(Op.getNode());
|
||||
if (Shuffle == X86ISD::MOVDDUP)
|
||||
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op);
|
||||
else
|
||||
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
|
||||
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
|
||||
DCI.AddToWorklist(Op.getNode());
|
||||
DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
|
||||
/*AddTo*/ true);
|
||||
|
@ -1,5 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE2
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+sse3 -x86-experimental-vector-shuffle-lowering | FileCheck %s --check-prefix=CHECK-SSE3
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
@ -49,7 +48,7 @@ define <2 x i64> @shuffle_v2i64_33(<2 x i64> %a, <2 x i64> %b) {
|
||||
|
||||
define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) {
|
||||
; CHECK-SSE2-LABEL: @shuffle_v2f64_00
|
||||
; CHECK-SSE2: unpcklpd {{.*}} # xmm0 = xmm0[0,0]
|
||||
; CHECK-SSE2: shufpd {{.*}} # xmm0 = xmm0[0,0]
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
@ -63,15 +62,17 @@ define <2 x double> @shuffle_v2f64_10(<2 x double> %a, <2 x double> %b) {
|
||||
}
|
||||
define <2 x double> @shuffle_v2f64_11(<2 x double> %a, <2 x double> %b) {
|
||||
; CHECK-SSE2-LABEL: @shuffle_v2f64_11
|
||||
; CHECK-SSE2: unpckhpd {{.*}} # xmm0 = xmm0[1,1]
|
||||
; CHECK-SSE2: shufpd {{.*}} # xmm0 = xmm0[1,1]
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 1>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
define <2 x double> @shuffle_v2f64_22(<2 x double> %a, <2 x double> %b) {
|
||||
; FIXME: Should these use movapd + shufpd to remove a domain change at the cost
|
||||
; of a mov?
|
||||
;
|
||||
; CHECK-SSE2-LABEL: @shuffle_v2f64_22
|
||||
; CHECK-SSE2: unpcklpd {{.*}} # xmm1 = xmm1[0,0]
|
||||
; CHECK-SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm1[0,1,0,1]
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 2, i32 2>
|
||||
ret <2 x double> %shuffle
|
||||
@ -85,8 +86,7 @@ define <2 x double> @shuffle_v2f64_32(<2 x double> %a, <2 x double> %b) {
|
||||
}
|
||||
define <2 x double> @shuffle_v2f64_33(<2 x double> %a, <2 x double> %b) {
|
||||
; CHECK-SSE2-LABEL: @shuffle_v2f64_33
|
||||
; CHECK-SSE2: unpckhpd {{.*}} # xmm1 = xmm1[1,1]
|
||||
; CHECK-SSE2-NEXT: movapd %xmm1, %xmm0
|
||||
; CHECK-SSE2: pshufd {{.*}} # xmm0 = xmm1[2,3,2,3]
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
%shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 3, i32 3>
|
||||
ret <2 x double> %shuffle
|
||||
@ -217,31 +217,3 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
|
||||
%shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 3, i32 1>
|
||||
ret <2 x i64> %shuffle
|
||||
}
|
||||
|
||||
|
||||
define <2 x double> @insert_dup_reg_v2f64(double %a) {
|
||||
; CHECK-SSE2-LABEL: @insert_dup_reg_v2f64
|
||||
; CHECK-SSE2: unpcklpd {{.*}} # xmm0 = xmm0[0,0]
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE3-LABEL: @insert_dup_reg_v2f64
|
||||
; CHECK-SSE3: unpcklpd {{.*}} # xmm0 = xmm0[0,0]
|
||||
; CHECK-SSE3-NEXT: retq
|
||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
define <2 x double> @insert_dup_mem_v2f64(double* %ptr) {
|
||||
; CHECK-SSE2-LABEL: @insert_dup_mem_v2f64
|
||||
; CHECK-SSE2: movsd {{.*}}, %xmm0
|
||||
; CHECK-SSE2-NEXT: unpcklpd {{.*}} # xmm0 = xmm0[0,0]
|
||||
; CHECK-SSE2-NEXT: retq
|
||||
;
|
||||
; CHECK-SSE3-LABEL: @insert_dup_mem_v2f64
|
||||
; CHECK-SSE3: movddup {{.*}}, %xmm0
|
||||
; CHECK-SSE3-NEXT: retq
|
||||
%a = load double* %ptr
|
||||
%v = insertelement <2 x double> undef, double %a, i32 0
|
||||
%shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0>
|
||||
ret <2 x double> %shuffle
|
||||
}
|
||||
|
@ -98,7 +98,7 @@ define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
|
||||
define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4f64_0001
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vunpcklpd {{.*}} # xmm1 = xmm0[0,0]
|
||||
; AVX1-NEXT: vmovlhps {{.*}} # xmm1 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
|
||||
@ -109,7 +109,7 @@ define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vunpcklpd {{.*}} # xmm1 = xmm1[0],xmm0[0]
|
||||
; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vmovlhps {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
|
||||
@ -120,7 +120,7 @@ define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[0],xmm1[1]
|
||||
; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vmovlhps {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
|
||||
@ -130,7 +130,7 @@ define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4f64_1000
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm1 = xmm0[1,0]
|
||||
; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vmovlhps {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
|
||||
@ -140,8 +140,8 @@ define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1-LABEL: @shuffle_v4f64_2200
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vunpcklpd {{.*}} # xmm1 = xmm1[0,0]
|
||||
; AVX1-NEXT: vunpcklpd {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vmovlhps {{.*}} # xmm1 = xmm1[0,0]
|
||||
; AVX1-NEXT: vmovlhps {{.*}} # xmm0 = xmm0[0,0]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
|
||||
@ -152,7 +152,7 @@ define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
|
||||
; AVX1-NEXT: vshufpd {{.*}} # xmm0 = xmm1[1],xmm0[0]
|
||||
; AVX1-NEXT: vunpckhpd {{.*}} # xmm1 = xmm1[1,1]
|
||||
; AVX1-NEXT: vmovhlps {{.*}} # xmm1 = xmm1[1,1]
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
%shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
|
||||
|
Loading…
Reference in New Issue
Block a user