diff --git a/include/llvm/IR/IntrinsicsARM.td b/include/llvm/IR/IntrinsicsARM.td index a2f062024ba..8d8c4af850f 100644 --- a/include/llvm/IR/IntrinsicsARM.td +++ b/include/llvm/IR/IntrinsicsARM.td @@ -466,4 +466,21 @@ def int_arm_neon_vbsl : Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + +// Crypto instructions +def int_arm_neon_aesd : Neon_2Arg_Intrinsic; +def int_arm_neon_aese : Neon_2Arg_Intrinsic; +def int_arm_neon_aesimc : Neon_1Arg_Intrinsic; +def int_arm_neon_aesmc : Neon_1Arg_Intrinsic; +def int_arm_neon_sha1h : Neon_1Arg_Intrinsic; +def int_arm_neon_sha1su1 : Neon_2Arg_Intrinsic; +def int_arm_neon_sha256su0 : Neon_2Arg_Intrinsic; +def int_arm_neon_sha1c : Neon_3Arg_Intrinsic; +def int_arm_neon_sha1m : Neon_3Arg_Intrinsic; +def int_arm_neon_sha1p : Neon_3Arg_Intrinsic; +def int_arm_neon_sha1su0: Neon_3Arg_Intrinsic; +def int_arm_neon_sha256h: Neon_3Arg_Intrinsic; +def int_arm_neon_sha256h2: Neon_3Arg_Intrinsic; +def int_arm_neon_sha256su1: Neon_3Arg_Intrinsic; + } // end TargetPrefix diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 5752005eacf..c9d71bd1ec8 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -67,6 +67,9 @@ def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", "Enable support for Performance Monitor extensions">; def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", "Enable support for TrustZone security extensions">; +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable support for Cryptography extensions", + [FeatureNEON]>; // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index e505e1a9ae7..27cfe960483 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -2015,7 +2015,7 @@ class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, } // Same as N2V but not predicated. -class N2Vnp op17_16, bits<3> op10_8, bit op7, bit op6, +class N2Vnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, dag oops, dag iops, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, list pattern> : NeonInp op17_16, bits<3> op10_8, bit op7, bit op6, // Encode constant bits let Inst{27-23} = 0b00111; let Inst{21-20} = 0b11; - let Inst{19-18} = 0b10; + let Inst{19-18} = op19_18; let Inst{17-16} = op17_16; let Inst{11} = 0; let Inst{10-8} = op10_8; diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index e432aca48bd..93ecb1640d1 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -212,6 +212,8 @@ def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "NEON">; +def HasCrypto : Predicate<"Subtarget->hasCrypto()">, + AssemblerPredicate<"FeatureCrypto", "crypto">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f1bd37ea526..269c13dedb7 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -2355,17 +2355,36 @@ class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, class N2VDIntnp op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2Vnp; class N2VQIntnp op17_16, bits<3> op10_8, bit op7, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> - : N2Vnp; +// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). +class N2VQIntXnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp; + +// Same as N2VQIntXnp but with Vd as a src register. +class N2VQIntX2np op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp { + let Constraints = "$src = $Vd"; +} + // Narrow 2-register operations. class N2VN op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -2534,7 +2553,7 @@ class N3VDIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp, bit Commutable> : N3Vnp; @@ -2592,6 +2611,19 @@ class N3VQIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, ResTy, OpTy, IntOp, Commutable, [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vn), (OpTy QPR:$Vm))))]>; +// Same as N3VQIntnp but with Vd as a src register. +class N3VQInt3np op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp { + let Constraints = "$src = $Vd"; +} + class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -2842,6 +2874,7 @@ class N3VL op21_20, bits<4> op11_8, bit op4, [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } + class N3VLSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> @@ -2897,6 +2930,17 @@ class N3VLInt op21_20, bits<4> op11_8, bit op4, [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } + +// Same as above, but not predicated. +class N3VLIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -4078,12 +4122,18 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "s", NEONvmulls, 1>; -defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "u", NEONvmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", - v8i16, v8i8, int_arm_neon_vmullp, 1>; +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "NEONData" in { + defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", NEONvmulls, 1>; + defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", NEONvmullu, 1>; + def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", + v8i16, v8i8, int_arm_neon_vmullp, 1>; + def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, + "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, + Requires<[HasV8, HasCrypto]>; +} defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; @@ -5818,6 +5868,49 @@ defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; +// Cryptography instructions +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "v8Crypto" in { + class AES + : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class AES2Op + : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA2Op op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N3SHA3Op op27_23, bits<2> op21_20, SDPatternOperator Int> + : N3VQInt3np, + Requires<[HasV8, HasCrypto]>; +} + +def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; +def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; +def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; +def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; + +def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>; +def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; +def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; +def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>; +def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>; +def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>; +def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; +def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; +def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; +def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index a227718e306..0692fbd1c13 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -110,6 +110,7 @@ void ARMSubtarget::initializeEnvironment() { FPOnlySP = false; HasPerfMon = false; HasTrustZone = false; + HasCrypto = false; AllowsUnalignedMem = false; Thumb2DSP = false; UseNaClTrap = false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 65278a5846f..a3b701a15a4 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -159,6 +159,9 @@ protected: /// HasTrustZone - if true, processor supports TrustZone security extensions bool HasTrustZone; + /// HasCrypto - if true, processor supports Cryptography extensions + bool HasCrypto; + /// AllowsUnalignedMem - If true, the subtarget allows unaligned memory /// accesses for some types. For details, see /// ARMTargetLowering::allowsUnalignedMemoryAccesses(). @@ -248,6 +251,7 @@ public: bool hasVFP4() const { return HasVFPv4; } bool hasFPARMv8() const { return HasFPARMv8; } bool hasNEON() const { return HasNEON; } + bool hasCrypto() const { return HasCrypto; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ed9c8c4e00b..edb7ccdc293 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -141,7 +141,8 @@ class ARMAsmParser : public MCTargetAsmParser { StringRef splitMnemonic(StringRef Mnemonic, unsigned &PredicationCode, bool &CarrySetting, unsigned &ProcessorIMod, StringRef &ITMask); - void getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, + void getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, + bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode); bool isThumb() const { @@ -4784,8 +4785,8 @@ StringRef ARMAsmParser::splitMnemonic(StringRef Mnemonic, // // FIXME: It would be nice to autogen this. void ARMAsmParser:: -getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, - bool &CanAcceptPredicationCode) { +getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, + bool &CanAcceptCarrySet, bool &CanAcceptPredicationCode) { if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" || Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" || Mnemonic == "add" || Mnemonic == "adc" || @@ -4808,7 +4809,9 @@ getMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet, Mnemonic == "vmaxnm" || Mnemonic == "vminnm" || Mnemonic == "vcvta" || Mnemonic == "vcvtn" || Mnemonic == "vcvtp" || Mnemonic == "vcvtm" || Mnemonic == "vrinta" || Mnemonic == "vrintn" || Mnemonic == "vrintp" || - Mnemonic == "vrintm") { + Mnemonic == "vrintm" || Mnemonic.startswith("aes") || + Mnemonic.startswith("sha1") || Mnemonic.startswith("sha256") || + (FullInst.startswith("vmull") && FullInst.endswith(".p64"))) { // These mnemonics are never predicable CanAcceptPredicationCode = false; } else if (!isThumb()) { @@ -5068,7 +5071,7 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // the matcher deal with finding the right instruction or generating an // appropriate error. bool CanAcceptCarrySet, CanAcceptPredicationCode; - getMnemonicAcceptInfo(Mnemonic, CanAcceptCarrySet, CanAcceptPredicationCode); + getMnemonicAcceptInfo(Mnemonic, Name, CanAcceptCarrySet, CanAcceptPredicationCode); // If we had a carry-set on an instruction that can't do that, issue an // error. diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 8a066643f2a..7f53240f21a 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -506,6 +506,14 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, return result; } + MI.clear(); + result = decodeInstruction(DecoderTablev8Crypto32, MI, insn, Address, + this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + MI.clear(); Size = 0; return MCDisassembler::Fail; @@ -825,6 +833,18 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } } + MI.clear(); + uint32_t NEONCryptoInsn = insn32; + NEONCryptoInsn &= 0xF0FFFFFF; // Clear bits 27-24 + NEONCryptoInsn |= (NEONCryptoInsn & 0x10000000) >> 4; // Move bit 28 to bit 24 + NEONCryptoInsn |= 0x12000000; // Set bits 28 and 25 + result = decodeInstruction(DecoderTablev8Crypto32, MI, NEONCryptoInsn, + Address, this, STI); + if (result != MCDisassembler::Fail) { + Size = 4; + return result; + } + MI.clear(); uint32_t NEONv8Insn = insn32; NEONv8Insn &= 0xF3FFFFFF; // Clear bits 27-26 diff --git a/test/CodeGen/ARM/intrinsics-crypto.ll b/test/CodeGen/ARM/intrinsics-crypto.ll new file mode 100644 index 00000000000..c038fe6da84 --- /dev/null +++ b/test/CodeGen/ARM/intrinsics-crypto.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -mtriple=armv8 -mattr=+crypto | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_aesde(<16 x i8>* %a, <16 x i8> *%b) { + %tmp = load <16 x i8>* %a + %tmp2 = load <16 x i8>* %b + %tmp3 = call <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8> %tmp, <16 x i8> %tmp2) + ; CHECK: aesd.8 q{{[0-9]+}}, q{{[0-9]+}} + %tmp4 = call <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8> %tmp3, <16 x i8> %tmp2) + ; CHECK: aese.8 q{{[0-9]+}}, q{{[0-9]+}} + %tmp5 = call <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8> %tmp4) + ; CHECK: aesimc.8 q{{[0-9]+}}, q{{[0-9]+}} + %tmp6 = call <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8> %tmp5) + ; CHECK: aesmc.8 q{{[0-9]+}}, q{{[0-9]+}} + ret <16 x i8> %tmp6 +} + +define arm_aapcs_vfpcc <4 x i32> @test_sha(<4 x i32> *%a, <4 x i32> *%b, <4 x i32> *%c) { + %tmp = load <4 x i32>* %a + %tmp2 = load <4 x i32>* %b + %tmp3 = load <4 x i32>* %c + %res1 = call <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32> %tmp) + ; CHECK: sha1h.32 q{{[0-9]+}}, q{{[0-9]+}} + %res2 = call <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32> %tmp2, <4 x i32> %tmp3, <4 x i32> %res1) + ; CHECK: sha1c.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} + %res3 = call <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32> %res2, <4 x i32> %tmp3, <4 x i32> %res1) + ; CHECK: sha1m.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} + %res4 = call <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32> %res3, <4 x i32> %tmp3, <4 x i32> %res1) + ; CHECK: sha1p.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} + %res5 = call <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32> %res4, <4 x i32> %tmp3, <4 x i32> %res1) + ; CHECK: sha1su0.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} + %res6 = call <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32> %res5, <4 x i32> %res1) + ; CHECK: sha1su1.32 q{{[0-9]+}}, q{{[0-9]+}} + %res7 = call <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32> %res6, <4 x i32> %tmp3, <4 x i32> %res1) + ; CHECK: sha256h.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} + %res8 = call <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32> %res7, <4 x i32> %tmp3, <4 x i32> %res1) + ; CHECK: sha256h2.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} + %res9 = call <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32> %res8, <4 x i32> %tmp3, <4 x i32> %res1) + ; CHECK: sha256su1.32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} + %res10 = call <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32> %res9, <4 x i32> %tmp3) + ; CHECK: sha256su0.32 q{{[0-9]+}}, q{{[0-9]+}} + ret <4 x i32> %res10 +} + +declare <16 x i8> @llvm.arm.neon.aesd.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.arm.neon.aese.v16i8(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.arm.neon.aesimc.v16i8(<16 x i8>) +declare <16 x i8> @llvm.arm.neon.aesmc.v16i8(<16 x i8>) +declare <4 x i32> @llvm.arm.neon.sha1h.v4i32(<4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1c.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1m.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1p.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1su0.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256h.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256h2.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256su1.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha256su0.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.arm.neon.sha1su1.v4i32(<4 x i32>, <4 x i32>) diff --git a/test/MC/ARM/invalid-neon-v8.s b/test/MC/ARM/invalid-neon-v8.s index 06406f3f9ca..7aaf2778a8a 100644 --- a/test/MC/ARM/invalid-neon-v8.s +++ b/test/MC/ARM/invalid-neon-v8.s @@ -1,4 +1,4 @@ -@ RUN: not llvm-mc -triple armv8 -mattr=+neon -show-encoding < %s 2>&1 | FileCheck %s +@ RUN: not llvm-mc -triple armv8 -mattr=+neon,+crypto -show-encoding < %s 2>&1 | FileCheck %s vmaxnm.f32 s4, d5, q1 @ CHECK: error: invalid operand for instruction @@ -24,3 +24,47 @@ vrintz.f32 d3, q12 @ CHECK: error: invalid operand for instruction vrintmge.f32.f32 d3, d4 @ CHECK: error: instruction 'vrintm' is not predicable, but condition code specified + +aesd.8 q0, s1 +@ CHECK: error: invalid operand for instruction +aese.8 s0, q1 +@ CHECK: error: invalid operand for instruction +aesimc.8 s0, q1 +@ CHECK: error: invalid operand for instruction +aesmc.8 q0, d1 +@ CHECK: error: invalid operand for instruction +aesdge.8 q0, q1 +@ CHECK: error: instruction 'aesd' is not predicable, but condition code specified + +sha1h.32 d0, q1 +@ CHECK: error: invalid operand for instruction +sha1su1.32 q0, s1 +@ CHECK: error: invalid operand for instruction +sha256su0.32 s0, q1 +@ CHECK: error: invalid operand for instruction +sha1heq.32 q0, q1 +@ CHECK: error: instruction 'sha1h' is not predicable, but condition code specified + +sha1c.32 s0, d1, q2 +@ CHECK: error: invalid operand for instruction +sha1m.32 q0, s1, q2 +@ CHECK: error: invalid operand for instruction +sha1p.32 s0, q1, q2 +@ CHECK: error: invalid operand for instruction +sha1su0.32 d0, q1, q2 +@ CHECK: error: invalid operand for instruction +sha256h.32 q0, s1, q2 +@ CHECK: error: invalid operand for instruction +sha256h2.32 q0, q1, s2 +@ CHECK: error: invalid operand for instruction +sha256su1.32 s0, d1, q2 +@ CHECK: error: invalid operand for instruction +sha256su1lt.32 q0, d1, q2 +@ CHECK: error: instruction 'sha256su1' is not predicable, but condition code specified + +vmull.p64 q0, s1, s3 +@ CHECK: error: invalid operand for instruction +vmull.p64 s1, d2, d3 +@ CHECK: error: invalid operand for instruction +vmullge.p64 q0, d16, d17 +@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified diff --git a/test/MC/ARM/neon-crypto.s b/test/MC/ARM/neon-crypto.s new file mode 100644 index 00000000000..92d24da6c65 --- /dev/null +++ b/test/MC/ARM/neon-crypto.s @@ -0,0 +1,51 @@ +@ RUN: llvm-mc -triple armv8 -mattr=+neon,+crypto -show-encoding < %s | FileCheck %s +@ RUN: not llvm-mc -triple=armv7 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix=CHECK-V7 + +aesd.8 q0, q1 +aese.8 q0, q1 +aesimc.8 q0, q1 +aesmc.8 q0, q1 +@ CHECK: aesd.8 q0, q1 @ encoding: [0x42,0x03,0xb0,0xf3] +@ CHECK: aese.8 q0, q1 @ encoding: [0x02,0x03,0xb0,0xf3] +@ CHECK: aesimc.8 q0, q1 @ encoding: [0xc2,0x03,0xb0,0xf3] +@ CHECK: aesmc.8 q0, q1 @ encoding: [0x82,0x03,0xb0,0xf3] +@ CHECK-V7: instruction requires: crypto armv8 +@ CHECK-V7: instruction requires: crypto armv8 +@ CHECK-V7: instruction requires: crypto armv8 +@ CHECK-V7: instruction requires: crypto armv8 + +sha1h.32 q0, q1 +sha1su1.32 q0, q1 +sha256su0.32 q0, q1 +@ CHECK: sha1h.32 q0, q1 @ encoding: [0xc2,0x02,0xb9,0xf3] +@ CHECK: sha1su1.32 q0, q1 @ encoding: [0x82,0x03,0xba,0xf3] +@ CHECK: sha256su0.32 q0, q1 @ encoding: [0xc2,0x03,0xba,0xf3] +@ CHECK-V7: instruction requires: crypto armv8 +@ CHECK-V7: instruction requires: crypto armv8 +@ CHECK-V7: instruction requires: crypto armv8 + +sha1c.32 q0, q1, q2 +sha1m.32 q0, q1, q2 +sha1p.32 q0, q1, q2 +sha1su0.32 q0, q1, q2 +sha256h.32 q0, q1, q2 +sha256h2.32 q0, q1, q2 +sha256su1.32 q0, q1, q2 +@ CHECK: sha1c.32 q0, q1, q2 @ encoding: [0x44,0x0c,0x02,0xf2] +@ CHECK: sha1m.32 q0, q1, q2 @ encoding: [0x44,0x0c,0x22,0xf2] +@ CHECK: sha1p.32 q0, q1, q2 @ encoding: [0x44,0x0c,0x12,0xf2] +@ CHECK: sha1su0.32 q0, q1, q2 @ encoding: [0x44,0x0c,0x32,0xf2] +@ CHECK: sha256h.32 q0, q1, q2 @ encoding: [0x44,0x0c,0x02,0xf3] +@ CHECK: sha256h2.32 q0, q1, q2 @ encoding: [0x44,0x0c,0x12,0xf3] +@ CHECK: sha256su1.32 q0, q1, q2 @ encoding: [0x44,0x0c,0x22,0xf3] +@ CHECK-V7: instruction requires: crypto armv8 +@ CHECK-V7: instruction requires: crypto armv8 +@ CHECK-V7: instruction requires: crypto armv8 +@ CHECK-V7: instruction requires: crypto armv8 +@ CHECK-V7: instruction requires: crypto armv8 +@ CHECK-V7: instruction requires: crypto armv8 +@ CHECK-V7: instruction requires: crypto armv8 + +vmull.p64 q8, d16, d17 +@ CHECK: vmull.p64 q8, d16, d17 @ encoding: [0xa1,0x0e,0xe0,0xf2] +@ CHECK-V7: instruction requires: crypto armv8 diff --git a/test/MC/ARM/thumb-invalid-crypto.txt b/test/MC/ARM/thumb-invalid-crypto.txt new file mode 100644 index 00000000000..a5f9a19690c --- /dev/null +++ b/test/MC/ARM/thumb-invalid-crypto.txt @@ -0,0 +1,42 @@ +@ RUN: not llvm-mc -triple thumbv8 -mattr=+neon,+crypto -show-encoding < %s 2>&1 | FileCheck %s + +iteee lo +aesdlo.8 q0, q1 +@ CHECK: error: instruction 'aesd' is not predicable, but condition code specified +aesimchs.8 q0, q1 +@ CHECK: error: instruction 'aesimc' is not predicable, but condition code specified +aesmchs.8 q0, q1 +@ CHECK: error: instruction 'aesmc' is not predicable, but condition code specified +aesehs.8 q0, q1 +@ CHECK: error: instruction 'aese' is not predicable, but condition code specified + +itee hs +sha1hhs.32 q0, q1 +@ CHECK: error: instruction 'sha1h' is not predicable, but condition code specified +sha1su1lo.32 q0, q1 +@ CHECK: error: instruction 'sha1su1' is not predicable, but condition code specified +sha256su0lo.32 q0, q1 +@ CHECK: error: instruction 'sha256su0' is not predicable, but condition code specified + +iteee lo +sha1clo.32 s0, d1, q2 +@ CHECK: error: instruction 'sha1c' is not predicable, but condition code specified +sha1mhs.32 q0, s1, q2 +@ CHECK: error: instruction 'sha1m' is not predicable, but condition code specified +sha1phs.32 s0, q1, q2 +@ CHECK: error: instruction 'sha1p' is not predicable, but condition code specified +sha1su0hs.32 d0, q1, q2 +@ CHECK: error: instruction 'sha1su0' is not predicable, but condition code specified +itee hs +sha256hhs.32 q0, s1, q2 +@ CHECK: error: instruction 'sha256h' is not predicable, but condition code specified +sha256h2lo.32 q0, q1, s2 +@ CHECK: error: instruction 'sha256h2' is not predicable, but condition code specified +sha256su1lo.32 s0, d1, q2 +@ CHECK: error: instruction 'sha256su1' is not predicable, but condition code specified + +ite lo +vmulllo.p64 q0, s1, s3 +@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified +vmullhs.p64 q0, d16, d17 +@ CHECK: error: instruction 'vmull' is not predicable, but condition code specified diff --git a/test/MC/ARM/thumb-neon-crypto.s b/test/MC/ARM/thumb-neon-crypto.s new file mode 100644 index 00000000000..096e9e81b10 --- /dev/null +++ b/test/MC/ARM/thumb-neon-crypto.s @@ -0,0 +1,35 @@ +@ RUN: llvm-mc -triple thumbv8 -mattr=+neon,+crypto -show-encoding < %s | FileCheck %s + +aesd.8 q0, q1 +@ CHECK: aesd.8 q0, q1 @ encoding: [0xb0,0xff,0x42,0x03] +aese.8 q0, q1 +@ CHECK: aese.8 q0, q1 @ encoding: [0xb0,0xff,0x02,0x03] +aesimc.8 q0, q1 +@ CHECK: aesimc.8 q0, q1 @ encoding: [0xb0,0xff,0xc2,0x03] +aesmc.8 q0, q1 +@ CHECK: aesmc.8 q0, q1 @ encoding: [0xb0,0xff,0x82,0x03] + +sha1h.32 q0, q1 +@ CHECK: sha1h.32 q0, q1 @ encoding: [0xb9,0xff,0xc2,0x02] +sha1su1.32 q0, q1 +@ CHECK: sha1su1.32 q0, q1 @ encoding: [0xba,0xff,0x82,0x03] +sha256su0.32 q0, q1 +@ CHECK: sha256su0.32 q0, q1 @ encoding: [0xba,0xff,0xc2,0x03] + +sha1c.32 q0, q1, q2 +@ CHECK: sha1c.32 q0, q1, q2 @ encoding: [0x02,0xef,0x44,0x0c] +sha1m.32 q0, q1, q2 +@ CHECK: sha1m.32 q0, q1, q2 @ encoding: [0x22,0xef,0x44,0x0c] +sha1p.32 q0, q1, q2 +@ CHECK: sha1p.32 q0, q1, q2 @ encoding: [0x12,0xef,0x44,0x0c] +sha1su0.32 q0, q1, q2 +@ CHECK: sha1su0.32 q0, q1, q2 @ encoding: [0x32,0xef,0x44,0x0c] +sha256h.32 q0, q1, q2 +@ CHECK: sha256h.32 q0, q1, q2 @ encoding: [0x02,0xff,0x44,0x0c] +sha256h2.32 q0, q1, q2 +@ CHECK: sha256h2.32 q0, q1, q2 @ encoding: [0x12,0xff,0x44,0x0c] +sha256su1.32 q0, q1, q2 +@ CHECK: sha256su1.32 q0, q1, q2 @ encoding: [0x22,0xff,0x44,0x0c] + +vmull.p64 q8, d16, d17 +@ CHECK: vmull.p64 q8, d16, d17 @ encoding: [0xe0,0xef,0xa1,0x0e] diff --git a/test/MC/Disassembler/ARM/neon-crypto.txt b/test/MC/Disassembler/ARM/neon-crypto.txt new file mode 100644 index 00000000000..086c781991d --- /dev/null +++ b/test/MC/Disassembler/ARM/neon-crypto.txt @@ -0,0 +1,35 @@ +# RUN: llvm-mc -triple armv8-unknown-unknown -mattr=+neon,+crypto -disassemble < %s | FileCheck %s + +0x42,0x03,0xb0,0xf3 +# CHECK: aesd.8 q0, q1 +0x02,0x03,0xb0,0xf3 +# CHECK: aese.8 q0, q1 +0xc2,0x03,0xb0,0xf3 +# CHECK: aesimc.8 q0, q1 +0x82,0x03,0xb0,0xf3 +# CHECK: aesmc.8 q0, q1 + +0xc2,0x02,0xb9,0xf3 +# CHECK: sha1h.32 q0, q1 +0x82,0x03,0xba,0xf3 +# CHECK: sha1su1.32 q0, q1 +0xc2,0x03,0xba,0xf3 +# CHECK: sha256su0.32 q0, q1 + +0x44,0x0c,0x02,0xf2 +# CHECK: sha1c.32 q0, q1, q2 +0x44,0x0c,0x22,0xf2 +# CHECK: sha1m.32 q0, q1, q2 +0x44,0x0c,0x12,0xf2 +# CHECK: sha1p.32 q0, q1, q2 +0x44,0x0c,0x32,0xf2 +# CHECK: sha1su0.32 q0, q1, q2 +0x44,0x0c,0x02,0xf3 +# CHECK: sha256h.32 q0, q1, q2 +0x44,0x0c,0x12,0xf3 +# CHECK: sha256h2.32 q0, q1, q2 +0x44,0x0c,0x22,0xf3 +# CHECK: sha256su1.32 q0, q1, q2 + +0xa1,0x0e,0xe0,0xf2 +# CHECK: vmull.p64 q8, d16, d17 diff --git a/test/MC/Disassembler/ARM/thumb-neon-crypto.txt b/test/MC/Disassembler/ARM/thumb-neon-crypto.txt new file mode 100644 index 00000000000..30b5fb6695a --- /dev/null +++ b/test/MC/Disassembler/ARM/thumb-neon-crypto.txt @@ -0,0 +1,35 @@ +# RUN: llvm-mc -triple thumbv8-unknown-unknown -mattr=+neon,+crypto -disassemble < %s | FileCheck %s + +0xb0 0xff 0x42 0x03 +# CHECK: aesd.8 q0, q1 +0xb0 0xff 0x02 0x03 +# CHECK: aese.8 q0, q1 +0xb0 0xff 0xc2 0x03 +# CHECK: aesimc.8 q0, q1 +0xb0 0xff 0x82 0x03 +# CHECK: aesmc.8 q0, q1 + +0xb9 0xff 0xc2 0x02 +# CHECK: sha1h.32 q0, q1 +0xba 0xff 0x82 0x03 +# CHECK: sha1su1.32 q0, q1 +0xba 0xff 0xc2 0x03 +# CHECK: sha256su0.32 q0, q1 + +0x02 0xef 0x44 0x0c +# CHECK: sha1c.32 q0, q1, q2 +0x22 0xef 0x44 0x0c +# CHECK: sha1m.32 q0, q1, q2 +0x12 0xef 0x44 0x0c +# CHECK: sha1p.32 q0, q1, q2 +0x32 0xef 0x44 0x0c +# CHECK: sha1su0.32 q0, q1, q2 +0x02 0xff 0x44 0x0c +# CHECK: sha256h.32 q0, q1, q2 +0x12 0xff 0x44 0x0c +# CHECK: sha256h2.32 q0, q1, q2 +0x22 0xff 0x44 0x0c +# CHECK: sha256su1.32 q0, q1, q2 + +0xe0 0xef 0xa1 0x0e +# CHECK: vmull.p64 q8, d16, d17