diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 215e7357205..96c6f410719 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2845,16 +2845,12 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: @@ -2926,16 +2922,12 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKHWD: case X86ISD::PUNPCKHBW: case X86ISD::PUNPCKHDQ: @@ -4651,8 +4643,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: DecodeUNPCKHPMask(VT, ShuffleMask); break; case X86ISD::PUNPCKLBW: @@ -4663,8 +4653,6 @@ static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, break; case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: DecodeUNPCKLPMask(VT, ShuffleMask); break; case X86ISD::MOVHLPS: @@ -6582,16 +6570,16 @@ static inline unsigned getUNPCKLOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKLDQ; case MVT::v2i64: return X86ISD::PUNPCKLQDQ; - case MVT::v4f32: return X86ISD::UNPCKLPS; - case MVT::v2f64: return X86ISD::UNPCKLPD; case MVT::v8i32: if (HasAVX2) return X86ISD::PUNPCKLDQ; // else use fp unit for int unpack. - case MVT::v8f32: return X86ISD::VUNPCKLPSY; + case MVT::v8f32: + case MVT::v4f32: return X86ISD::UNPCKLPS; case MVT::v4i64: if (HasAVX2) return X86ISD::PUNPCKLQDQ; // else use fp unit for int unpack. - case MVT::v4f64: return X86ISD::VUNPCKLPDY; + case MVT::v4f64: + case MVT::v2f64: return X86ISD::UNPCKLPD; case MVT::v32i8: case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v16i16: @@ -6606,16 +6594,16 @@ static inline unsigned getUNPCKHOpcode(EVT VT, bool HasAVX2) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKHDQ; case MVT::v2i64: return X86ISD::PUNPCKHQDQ; - case MVT::v4f32: return X86ISD::UNPCKHPS; - case MVT::v2f64: return X86ISD::UNPCKHPD; case MVT::v8i32: if (HasAVX2) return X86ISD::PUNPCKHDQ; // else use fp unit for int unpack. - case MVT::v8f32: return X86ISD::VUNPCKHPSY; + case MVT::v8f32: + case MVT::v4f32: return X86ISD::UNPCKHPS; case MVT::v4i64: if (HasAVX2) return X86ISD::PUNPCKHQDQ; // else use fp unit for int unpack. - case MVT::v4f64: return X86ISD::VUNPCKHPDY; + case MVT::v4f64: + case MVT::v2f64: return X86ISD::UNPCKHPD; case MVT::v32i8: case MVT::v16i8: return X86ISD::PUNPCKHBW; case MVT::v16i16: @@ -11280,8 +11268,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; - case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY"; - case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; @@ -14877,16 +14863,12 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKHQDQ: case X86ISD::UNPCKHPS: case X86ISD::UNPCKHPD: - case X86ISD::VUNPCKHPSY: - case X86ISD::VUNPCKHPDY: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - case X86ISD::VUNPCKLPSY: - case X86ISD::VUNPCKLPDY: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 616a192b5a6..ccff3a5ea69 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -275,12 +275,8 @@ namespace llvm { MOVSS, UNPCKLPS, UNPCKLPD, - VUNPCKLPSY, - VUNPCKLPDY, UNPCKHPS, UNPCKHPD, - VUNPCKHPSY, - VUNPCKHPDY, PUNPCKLBW, PUNPCKLWD, PUNPCKLDQ, diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 34f61740062..791bbe6566c 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -132,13 +132,9 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; -def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>; -def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>; def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; -def X86Unpckhpsy : SDNode<"X86ISD::VUNPCKHPSY", SDTShuff2Op>; -def X86Unpckhpdy : SDNode<"X86ISD::VUNPCKHPDY", SDTShuff2Op>; def X86Punpcklbw : SDNode<"X86ISD::PUNPCKLBW", SDTShuff2Op>; def X86Punpcklwd : SDNode<"X86ISD::PUNPCKLWD", SDTShuff2Op>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 38aaccbf09e..7cadac16d7d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2472,21 +2472,21 @@ let Predicates = [HasAVX] in { def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), (VUNPCKHPSrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpcklps VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpcklps VR256:$src1, VR256:$src2)), (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8i32 (X86Unpcklps VR256:$src1, VR256:$src2)), (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpcklpsy VR256:$src1, (memopv8i32 addr:$src2))), + def : Pat<(v8i32 (X86Unpcklps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, (memopv8f32 addr:$src2))), + def : Pat<(v8f32 (X86Unpckhps VR256:$src1, (memopv8f32 addr:$src2))), (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8f32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8f32 (X86Unpckhps VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, (memopv8i32 addr:$src2))), + def : Pat<(v8i32 (X86Unpckhps VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))), (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (X86Unpckhpsy VR256:$src1, VR256:$src2)), + def : Pat<(v8i32 (X86Unpckhps VR256:$src1, VR256:$src2)), (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), @@ -2498,21 +2498,21 @@ let Predicates = [HasAVX] in { def : Pat<(v2f64 (X86Unpckhpd VR128:$src1, VR128:$src2)), (VUNPCKHPDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), + def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpcklpd VR256:$src1, VR256:$src2)), (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, (memopv4i64 addr:$src2))), (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4i64 (X86Unpcklpd VR256:$src1, VR256:$src2)), (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, (memopv4f64 addr:$src2))), + def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, (memopv4f64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4f64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4f64 (X86Unpckhpd VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, (memopv4i64 addr:$src2))), + def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, (memopv4i64 addr:$src2))), (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (X86Unpckhpdy VR256:$src1, VR256:$src2)), + def : Pat<(v4i64 (X86Unpckhpd VR256:$src1, VR256:$src2)), (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; // FIXME: Instead of X86Movddup, there should be a X86Unpcklpd here, the diff --git a/test/CodeGen/X86/avx-unpack.ll b/test/CodeGen/X86/avx-unpack.ll index d420101339f..fcd7bb6883a 100644 --- a/test/CodeGen/X86/avx-unpack.ll +++ b/test/CodeGen/X86/avx-unpack.ll @@ -67,6 +67,15 @@ entry: ret <8 x i32> %shuffle.i } +; CHECK: vunpckhps (% +define <8 x i32> @unpackhips2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp { +entry: + %a = load <8 x i32>* %src1 + %b = load <8 x i32>* %src2 + %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %shuffle.i +} + ; CHECK: vunpckhpd define <4 x i64> @unpackhipd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp { entry: @@ -74,6 +83,15 @@ entry: ret <4 x i64> %shuffle.i } +; CHECK: vunpckhpd (% +define <4 x i64> @unpackhipd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp { +entry: + %a = load <4 x i64>* %src1 + %b = load <4 x i64>* %src2 + %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %shuffle.i +} + ; CHECK: vunpcklps define <8 x i32> @unpacklops1(<8 x i32> %src1, <8 x i32> %src2) nounwind uwtable readnone ssp { entry: @@ -81,9 +99,27 @@ entry: ret <8 x i32> %shuffle.i } +; CHECK: vunpcklps (% +define <8 x i32> @unpacklops2(<8 x i32>* %src1, <8 x i32>* %src2) nounwind uwtable readnone ssp { +entry: + %a = load <8 x i32>* %src1 + %b = load <8 x i32>* %src2 + %shuffle.i = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %shuffle.i +} + ; CHECK: vunpcklpd define <4 x i64> @unpacklopd1(<4 x i64> %src1, <4 x i64> %src2) nounwind uwtable readnone ssp { entry: %shuffle.i = shufflevector <4 x i64> %src1, <4 x i64> %src2, <4 x i32> ret <4 x i64> %shuffle.i } + +; CHECK: vunpcklpd (% +define <4 x i64> @unpacklopd2(<4 x i64>* %src1, <4 x i64>* %src2) nounwind uwtable readnone ssp { +entry: + %a = load <4 x i64>* %src1 + %b = load <4 x i64>* %src2 + %shuffle.i = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> + ret <4 x i64> %shuffle.i +}