From c0401dddf72b6ce4dff9411922eadfd5458a60e0 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Wed, 13 Jul 2011 21:36:51 +0000 Subject: [PATCH] Make X86ISD::ANDNP more general and Codegen 256-bit VANDNP. A more general version of X86ISD::ANDNP also opened the room for a little bit of refactoring. llvm-svn: 135088 --- lib/Target/X86/X86ISelLowering.cpp | 8 ++- lib/Target/X86/X86InstrFragmentsSIMD.td | 2 +- lib/Target/X86/X86InstrSSE.td | 91 +++++++++---------------- test/CodeGen/X86/avx-256-logic.ll | 45 ++++++++++++ 4 files changed, 82 insertions(+), 64 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f953bf2bc7f..e06d84c54a0 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11821,10 +11821,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (R.getNode()) return R; - // Want to form ANDNP nodes, in the hopes of then easily combining them with - // OR and AND nodes to form PBLEND/PSIGN. + // Want to form ANDNP nodes: + // 1) In the hopes of then easily combining them with OR and AND nodes + // to form PBLEND/PSIGN. + // 2) To match ANDN packed intrinsics EVT VT = N->getValueType(0); - if (VT != MVT::v2i64) + if (VT != MVT::v2i64 && VT != MVT::v4i64) return SDValue(); SDValue N0 = N->getOperand(0); diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 67a5a345be8..e35a6751929 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -47,7 +47,7 @@ def X86pshufb : SDNode<"X86ISD::PSHUFB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86andnp : SDNode<"X86ISD::ANDNP", - SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>, + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>>; def X86psignb : SDNode<"X86ISD::PSIGNB", SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 72b383000af..e6167c73d30 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1473,98 +1473,68 @@ let neverHasSideEffects = 1, Pattern = [], isCommutable = 0 in /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops /// multiclass sse12_fp_packed_logical opc, string OpcodeStr, - SDNode OpNode, int HasPat = 0, - list> Pattern = []> { + SDNode OpNode> { let Pattern = [] in { defm V#NAME#PS : sse12_fp_packed_logical_rm, - VEX_4V; + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, VEX_4V; defm V#NAME#PD : sse12_fp_packed_logical_rm, - OpSize, VEX_4V; + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))], 0>, + OpSize, VEX_4V; } let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed_logical_rm, TB; + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), + (memopv2i64 addr:$src2)))]>, TB; defm PD : sse12_fp_packed_logical_rm, - TB, OpSize; + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (bc_v2i64 (v2f64 VR128:$src2))))], + [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), + (memopv2i64 addr:$src2)))]>, TB, OpSize; } } /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms /// multiclass sse12_fp_packed_logical_y opc, string OpcodeStr, - SDNode OpNode, int HasNoPat = 0> { + SDNode OpNode> { defm PSY : sse12_fp_packed_logical_rm, // rr - [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, - VR256:$src2)))]), - !if(HasNoPat, [], // rm - [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), - (memopv4i64 addr:$src2)))]), 0>, VEX_4V; + [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, VEX_4V; defm PDY : sse12_fp_packed_logical_rm, // rr - [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), - (bc_v4i64 (v4f64 VR256:$src2))))]), - !if(HasNoPat, [], // rm - [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), - (memopv4i64 addr:$src2)))]), 0>, - OpSize, VEX_4V; + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (bc_v4i64 (v4f64 VR256:$src2))))], + [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), + (memopv4i64 addr:$src2)))], 0>, + OpSize, VEX_4V; } // AVX 256-bit packed logical ops forms -defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; -defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; -defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; -let isCommutable = 0 in { - defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", undef /* dummy */, 1>; -} +defm VAND : sse12_fp_packed_logical_y<0x54, "and", and>; +defm VOR : sse12_fp_packed_logical_y<0x56, "or", or>; +defm VXOR : sse12_fp_packed_logical_y<0x57, "xor", xor>; +defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>; defm AND : sse12_fp_packed_logical<0x54, "and", and>; defm OR : sse12_fp_packed_logical<0x56, "or", or>; defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; let isCommutable = 0 in - defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [ - // single r+r - [(set VR128:$dst, (X86andnp VR128:$src1, VR128:$src2))], - // double r+r - [], - // single r+m - [(set VR128:$dst, (X86andnp VR128:$src1, (memopv2i64 addr:$src2)))], - // double r+m - []]>; + defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Arithmetic Instructions @@ -3678,6 +3648,7 @@ let Predicates = [HasAVX] in { def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; + def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; } // Move scalar to XMM zero-extended diff --git a/test/CodeGen/X86/avx-256-logic.ll b/test/CodeGen/X86/avx-256-logic.ll index 05e82895fec..d9e5d081fb1 100644 --- a/test/CodeGen/X86/avx-256-logic.ll +++ b/test/CodeGen/X86/avx-256-logic.ll @@ -114,3 +114,48 @@ entry: ret <8 x float> %1 } +; CHECK: vandnpd +define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <4 x double> %x to <4 x i64> + %neg.i = xor <4 x i64> %0, + %1 = bitcast <4 x double> %y to <4 x i64> + %and.i = and <4 x i64> %1, %neg.i + %2 = bitcast <4 x i64> %and.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vandnpd (% +define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp { +entry: + %tmp2 = load <4 x double>* %x, align 32 + %0 = bitcast <4 x double> %y to <4 x i64> + %neg.i = xor <4 x i64> %0, + %1 = bitcast <4 x double> %tmp2 to <4 x i64> + %and.i = and <4 x i64> %1, %neg.i + %2 = bitcast <4 x i64> %and.i to <4 x double> + ret <4 x double> %2 +} + +; CHECK: vandnps +define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp { +entry: + %0 = bitcast <8 x float> %x to <8 x i32> + %neg.i = xor <8 x i32> %0, + %1 = bitcast <8 x float> %y to <8 x i32> + %and.i = and <8 x i32> %1, %neg.i + %2 = bitcast <8 x i32> %and.i to <8 x float> + ret <8 x float> %2 +} + +; CHECK: vandnps (% +define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp { +entry: + %tmp2 = load <8 x float>* %x, align 32 + %0 = bitcast <8 x float> %y to <8 x i32> + %neg.i = xor <8 x i32> %0, + %1 = bitcast <8 x float> %tmp2 to <8 x i32> + %and.i = and <8 x i32> %1, %neg.i + %2 = bitcast <8 x i32> %and.i to <8 x float> + ret <8 x float> %2 +}