Remove 256-bit specific node types for UNPCKHPS/D and instead use the 128-bit versions and let the operand type disinquish. Also fix the load form of the v8i32 patterns for these to realize that the load would be promoted to v4i64.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@145126 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2011-11-24 22:57:10 +00:00
parent f475a55bd4
commit 705f2431a0
5 changed files with 60 additions and 50 deletions

View File

@ -2845,16 +2845,12 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
case X86ISD::VUNPCKHPSY:
case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHDQ:
@ -2926,16 +2922,12 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT,
case X86ISD::MOVSD:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
case X86ISD::VUNPCKHPSY:
case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKHWD:
case X86ISD::PUNPCKHBW:
case X86ISD::PUNPCKHDQ:
@ -4651,8 +4643,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
break;
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
case X86ISD::VUNPCKHPSY:
case X86ISD::VUNPCKHPDY:
DecodeUNPCKHPMask(VT, ShuffleMask);
break;
case X86ISD::PUNPCKLBW:
@ -4663,8 +4653,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
break;
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
DecodeUNPCKLPMask(VT, ShuffleMask);
break;
case X86ISD::MOVHLPS:
@ -6582,16 +6570,16 @@ static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v4i32: return X86ISD::PUNPCKLDQ;
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
case MVT::v4f32: return X86ISD::UNPCKLPS;
case MVT::v2f64: return X86ISD::UNPCKLPD;
case MVT::v8i32:
if (HasAVX2) return X86ISD::PUNPCKLDQ;
// else use fp unit for int unpack.
case MVT::v8f32: return X86ISD::VUNPCKLPSY;
case MVT::v8f32:
case MVT::v4f32: return X86ISD::UNPCKLPS;
case MVT::v4i64:
if (HasAVX2) return X86ISD::PUNPCKLQDQ;
// else use fp unit for int unpack.
case MVT::v4f64: return X86ISD::VUNPCKLPDY;
case MVT::v4f64:
case MVT::v2f64: return X86ISD::UNPCKLPD;
case MVT::v32i8:
case MVT::v16i8: return X86ISD::PUNPCKLBW;
case MVT::v16i16:
@ -6606,16 +6594,16 @@ static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v4i32: return X86ISD::PUNPCKHDQ;
case MVT::v2i64: return X86ISD::PUNPCKHQDQ;
case MVT::v4f32: return X86ISD::UNPCKHPS;
case MVT::v2f64: return X86ISD::UNPCKHPD;
case MVT::v8i32:
if (HasAVX2) return X86ISD::PUNPCKHDQ;
// else use fp unit for int unpack.
case MVT::v8f32: return X86ISD::VUNPCKHPSY;
case MVT::v8f32:
case MVT::v4f32: return X86ISD::UNPCKHPS;
case MVT::v4i64:
if (HasAVX2) return X86ISD::PUNPCKHQDQ;
// else use fp unit for int unpack.
case MVT::v4f64: return X86ISD::VUNPCKHPDY;
case MVT::v4f64:
case MVT::v2f64: return X86ISD::UNPCKHPD;
case MVT::v32i8:
case MVT::v16i8: return X86ISD::PUNPCKHBW;
case MVT::v16i16:
@ -11280,8 +11268,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::MOVSS: return "X86ISD::MOVSS";
case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS";
case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD";
case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY";
case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY";
case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS";
case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD";
case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW";
@ -14877,16 +14863,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::PUNPCKHQDQ:
case X86ISD::UNPCKHPS:
case X86ISD::UNPCKHPD:
case X86ISD::VUNPCKHPSY:
case X86ISD::VUNPCKHPDY:
case X86ISD::PUNPCKLBW:
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
case X86ISD::VUNPCKLPSY:
case X86ISD::VUNPCKLPDY:
case X86ISD::MOVHLPS:
case X86ISD::MOVLHPS:
case X86ISD::PSHUFD:

View File

@ -275,12 +275,8 @@ namespace llvm {
MOVSS,
UNPCKLPS,
UNPCKLPD,
VUNPCKLPSY,
VUNPCKLPDY,
UNPCKHPS,
UNPCKHPD,
VUNPCKHPSY,
VUNPCKHPDY,
PUNPCKLBW,
PUNPCKLWD,
PUNPCKLDQ,

View File

@ -132,13 +132,9 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>;
def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>;
def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>;
def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>;

View File

@ -2472,21 +2472,21 @@ let Predicates = [HasAVX] in {
def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
(VUNPCKHPSrr VR128:$src1, VR128:$src2)>;
def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))),
def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))),
def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))),
def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)),
def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
@ -2498,21 +2498,21 @@ let Predicates = [HasAVX] in {
def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)),
(VUNPCKHPDrr VR128:$src1, VR128:$src2)>;
def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))),
def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))),
def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))),
def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)),
def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
// FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the

View File

@ -67,6 +67,15 @@ entry:
ret <8 x i32> %shuffle.i
}
; CHECK: vunpckhps (%
define <8 x i32> @unpackhips2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
entry:
%a = load <8 x i32>* %src1
%b = load <8 x i32>* %src2
%shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
ret <8 x i32> %shuffle.i
}
; CHECK: vunpckhpd
define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
entry:
@ -74,6 +83,15 @@ entry:
ret <4 x i64> %shuffle.i
}
; CHECK: vunpckhpd (%
define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
entry:
%a = load <4 x i64>* %src1
%b = load <4 x i64>* %src2
%shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
ret <4 x i64> %shuffle.i
}
; CHECK: vunpcklps
define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp {
entry:
@ -81,9 +99,27 @@ entry:
ret <8 x i32> %shuffle.i
}
; CHECK: vunpcklps (%
define <8 x i32> @unpacklops2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp {
entry:
%a = load <8 x i32>* %src1
%b = load <8 x i32>* %src2
%shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
ret <8 x i32> %shuffle.i
}
; CHECK: vunpcklpd
define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp {
entry:
%shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x i64> %shuffle.i
}
; CHECK: vunpcklpd (%
define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp {
entry:
%a = load <4 x i64>* %src1
%b = load <4 x i64>* %src2
%shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x i64> %shuffle.i
}