diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 9e9192bc8a0..0091df753eb 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -625,6 +625,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; + case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; case ARMISD::VDUP: return "ARMISD::VDUP"; case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; case ARMISD::VEXT: return "ARMISD::VEXT"; @@ -2925,6 +2926,8 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, switch (SplatBitSize) { case 8: + if (!isVMOV) + return SDValue(); // Any 1-byte value is OK. Op=0, Cmode=1110. assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); OpCmode = 0xe; @@ -3006,9 +3009,9 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, return SDValue(); case 64: { - // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. if (!isVMOV) return SDValue(); + // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. uint64_t BitMask = 0xff; uint64_t Val = 0; unsigned ImmMask = 1; @@ -3248,6 +3251,17 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); } + + // Try an immediate VMVN. + uint64_t NegatedImm = (SplatBits.getZExtValue() ^ + ((1LL << SplatBitSize) - 1)); + Val = isNEONModifiedImm(NegatedImm, + SplatUndef.getZExtValue(), SplatBitSize, + DAG, VmovVT, VT.is128BitVector(), false); + if (Val.getNode()) { + SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); + } } } @@ -4232,14 +4246,15 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, /// ARMISD::VDUPLANE. static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { - // If the source is already a VMOVIMM splat, the VDUPLANE is redundant. + // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is + // redundant. SDValue Op = N->getOperand(0); EVT VT = N->getValueType(0); // Ignore bit_converts. while (Op.getOpcode() == ISD::BIT_CONVERT) Op = Op.getOperand(0); - if (Op.getOpcode() != ARMISD::VMOVIMM) + if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) return SDValue(); // Make sure the VMOV element size is not bigger than the VDUPLANE elements. diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 81120c8d994..128b72e1e74 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -124,8 +124,11 @@ namespace llvm { VGETLANEu, // zero-extend vector extract element VGETLANEs, // sign-extend vector extract element - // Vector duplicate: + // Vector move immediate and move negated immediate: VMOVIMM, + VMVNIMM, + + // Vector duplicate: VDUP, VDUPLANE, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index bde806d5248..7f7eb980abe 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -65,9 +65,10 @@ def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>, def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; -def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", - SDTypeProfile<1, 1, [SDTCisVec<0>, - SDTCisVT<1, i32>]>>; +def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; +def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; +def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; + def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, @@ -2383,6 +2384,28 @@ def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnotq QPR:$src2))))]>; +// VMVN : Vector Bitwise NOT (Immediate) + +let isReMaterializable = 1 in { +def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i16", "$dst, $SIMM", "", + [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>; +def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i16", "$dst, $SIMM", "", + [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>; + +def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i32", "$dst, $SIMM", "", + [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>; +def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, + "vmvn", "i32", "$dst, $SIMM", "", + [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>; +} + // VMVN : Vector Bitwise NOT def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD, diff --git a/test/CodeGen/ARM/vmov.ll b/test/CodeGen/ARM/vmov.ll index f80301863d7..5e872ab6d0b 100644 --- a/test/CodeGen/ARM/vmov.ll +++ b/test/CodeGen/ARM/vmov.ll @@ -18,6 +18,18 @@ define <4 x i16> @v_movi16b() nounwind { ret <4 x i16> < i16 4096, i16 4096, i16 4096, i16 4096 > } +define <4 x i16> @v_mvni16a() nounwind { +;CHECK: v_mvni16a: +;CHECK: vmvn.i16 d0, #0x10 + ret <4 x i16> < i16 65519, i16 65519, i16 65519, i16 65519 > +} + +define <4 x i16> @v_mvni16b() nounwind { +;CHECK: v_mvni16b: +;CHECK: vmvn.i16 d0, #0x1000 + ret <4 x i16> < i16 61439, i16 61439, i16 61439, i16 61439 > +} + define <2 x i32> @v_movi32a() nounwind { ;CHECK: v_movi32a: ;CHECK: vmov.i32 d0, #0x20 @@ -54,6 +66,42 @@ define <2 x i32> @v_movi32f() nounwind { ret <2 x i32> < i32 2162687, i32 2162687 > } +define <2 x i32> @v_mvni32a() nounwind { +;CHECK: v_mvni32a: +;CHECK: vmvn.i32 d0, #0x20 + ret <2 x i32> < i32 4294967263, i32 4294967263 > +} + +define <2 x i32> @v_mvni32b() nounwind { +;CHECK: v_mvni32b: +;CHECK: vmvn.i32 d0, #0x2000 + ret <2 x i32> < i32 4294959103, i32 4294959103 > +} + +define <2 x i32> @v_mvni32c() nounwind { +;CHECK: v_mvni32c: +;CHECK: vmvn.i32 d0, #0x200000 + ret <2 x i32> < i32 4292870143, i32 4292870143 > +} + +define <2 x i32> @v_mvni32d() nounwind { +;CHECK: v_mvni32d: +;CHECK: vmvn.i32 d0, #0x20000000 + ret <2 x i32> < i32 3758096383, i32 3758096383 > +} + +define <2 x i32> @v_mvni32e() nounwind { +;CHECK: v_mvni32e: +;CHECK: vmvn.i32 d0, #0x20FF + ret <2 x i32> < i32 4294958848, i32 4294958848 > +} + +define <2 x i32> @v_mvni32f() nounwind { +;CHECK: v_mvni32f: +;CHECK: vmvn.i32 d0, #0x20FFFF + ret <2 x i32> < i32 4292804608, i32 4292804608 > +} + define <1 x i64> @v_movi64() nounwind { ;CHECK: v_movi64: ;CHECK: vmov.i64 d0, #0xFF0000FF0000FFFF