Add LLVM support for PPC cryptography builtins

Review: http://reviews.llvm.org/D7955


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231285 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nemanja Ivanovic 2015-03-04 20:44:33 +00:00
parent c56226c6d1
commit b69d556c37
14 changed files with 539 additions and 3 deletions

View File

@ -73,7 +73,7 @@ class PowerPC_Vec_FFF_Intrinsic<string GCCIntSuffix>
[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty],
[IntrNoMem]>;
/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16f8
/// PowerPC_Vec_BBB_Intrinsic - A PowerPC intrinsic that takes two v16i8
/// vectors and returns one. These intrinsics have no side effects.
class PowerPC_Vec_BBB_Intrinsic<string GCCIntSuffix>
: PowerPC_Vec_Intrinsic<GCCIntSuffix,
@ -126,7 +126,6 @@ class PowerPC_VSX_Sca_DDD_Intrinsic<string GCCIntSuffix>
[llvm_double_ty], [llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
//===----------------------------------------------------------------------===//
// PowerPC Altivec Intrinsic Definitions.
@ -559,6 +558,41 @@ def int_ppc_altivec_vlogefp : PowerPC_Vec_FF_Intrinsic<"vlogefp">;
def int_ppc_altivec_vrefp : PowerPC_Vec_FF_Intrinsic<"vrefp">;
def int_ppc_altivec_vrsqrtefp : PowerPC_Vec_FF_Intrinsic<"vrsqrtefp">;
// Power8 Intrinsics
// Crypto
let TargetPrefix = "ppc" in { // All PPC intrinsics start with "llvm.ppc.".
def int_ppc_altivec_crypto_vsbox :
GCCBuiltin<"__builtin_altivec_crypto_vsbox">,
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
def int_ppc_altivec_crypto_vpermxor :
GCCBuiltin<"__builtin_altivec_crypto_vpermxor">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
// These need diagnostics for invalid arguments so don't inherit from GCCBuiltin
def int_ppc_altivec_crypto_vshasigmad :
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
def int_ppc_altivec_crypto_vshasigmaw :
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
}
def int_ppc_altivec_crypto_vcipher :
PowerPC_Vec_DDD_Intrinsic<"crypto_vcipher">;
def int_ppc_altivec_crypto_vcipherlast :
PowerPC_Vec_DDD_Intrinsic<"crypto_vcipherlast">;
def int_ppc_altivec_crypto_vncipher :
PowerPC_Vec_DDD_Intrinsic<"crypto_vncipher">;
def int_ppc_altivec_crypto_vncipherlast :
PowerPC_Vec_DDD_Intrinsic<"crypto_vncipherlast">;
def int_ppc_altivec_crypto_vpmsumb :
PowerPC_Vec_BBB_Intrinsic<"crypto_vpmsumb">;
def int_ppc_altivec_crypto_vpmsumh :
PowerPC_Vec_HHH_Intrinsic<"crypto_vpmsumh">;
def int_ppc_altivec_crypto_vpmsumw :
PowerPC_Vec_WWW_Intrinsic<"crypto_vpmsumw">;
def int_ppc_altivec_crypto_vpmsumd :
PowerPC_Vec_DDD_Intrinsic<"crypto_vpmsumd">;
//===----------------------------------------------------------------------===//
// PowerPC VSX Intrinsic Definitions.

View File

@ -425,6 +425,7 @@ public:
bool isToken() const override { return Kind == Token; }
bool isImm() const override { return Kind == Immediate || Kind == Expression; }
bool isU1Imm() const { return Kind == Immediate && isUInt<1>(getImm()); }
bool isU2Imm() const { return Kind == Immediate && isUInt<2>(getImm()); }
bool isU4Imm() const { return Kind == Immediate && isUInt<4>(getImm()); }
bool isU5Imm() const { return Kind == Immediate && isUInt<5>(getImm()); }

View File

@ -214,6 +214,13 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo+1, O);
}
void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();
assert(Value <= 1 && "Invalid u1imm argument!");
O << (unsigned int)Value;
}
void PPCInstPrinter::printU2ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned int Value = MI->getOperand(OpNo).getImm();

View File

@ -43,6 +43,7 @@ public:
void printPredicateOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O, const char *Modifier = nullptr);
void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS5ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);

View File

@ -112,6 +112,9 @@ def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
def FeatureP8Altivec : SubtargetFeature<"power8-altivec", "HasP8Altivec", "true",
"Enable POWER8 Altivec instructions",
[FeatureAltivec]>;
def FeatureP8Crypto : SubtargetFeature<"crypto", "HasP8Crypto", "true",
"Enable POWER8 Crypto instructions",
[FeatureP8Altivec]>;
def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
"Enable POWER8 vector instructions",
[FeatureVSX, FeatureP8Altivec]>;
@ -258,7 +261,7 @@ def ProcessorFeatures {
FeatureFRE, FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX,
FeaturePOPCNTD, FeatureCMPB, FeatureLDBRX, FeatureP8Crypto,
Feature64Bit /*, Feature64BitRegs */, FeatureICBT,
DeprecatedMFTB, DeprecatedDST];
}

View File

@ -269,6 +269,16 @@ class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
!strconcat(opc, " $vD, $vB"), IIC_VecFP,
[(set OutTy:$vD, (IntID InTy:$vB))]>;
class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
: VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA),
!strconcat(opc, " $vD, $vA"), IIC_VecFP,
[(set Ty:$vD, (IntID Ty:$vA))]>;
class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
: VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
!strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
[(set Ty:$vD, (IntID Ty:$vA, imm:$ST, imm:$SIX))]>;
//===----------------------------------------------------------------------===//
// Instruction Definitions.
@ -939,6 +949,7 @@ def : Pat<(v4f32 (fnearbyint v4f32:$vA)),
} // end HasAltivec
def HasP8Altivec : Predicate<"PPCSubTarget->hasP8Altivec()">;
def HasP8Crypto : Predicate<"PPCSubTarget->hasP8Crypto()">;
let Predicates = [HasP8Altivec] in {
let isCommutable = 1 in {
@ -1035,4 +1046,33 @@ def VCMPGTSDo : VCMPo<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
def VCMPGTUDo : VCMPo<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
// The cryptography instructions that do not require Category:Vector.Crypto
def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
int_ppc_altivec_crypto_vpmsumb, v16i8>;
def VPMSUMH : VX1_Int_Ty<1096, "vpmsumh",
int_ppc_altivec_crypto_vpmsumh, v8i16>;
def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
int_ppc_altivec_crypto_vpmsumw, v4i32>;
def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
int_ppc_altivec_crypto_vpmsumd, v2i64>;
def VPERMXOR : VA1a_Int_Ty<45, "vpermxor",
int_ppc_altivec_crypto_vpermxor, v16i8>;
} // end HasP8Altivec
// Crypto instructions (from builtins)
let Predicates = [HasP8Crypto] in {
def VSHASIGMAW : VXCR_Int_Ty<1666, "vshasigmaw",
int_ppc_altivec_crypto_vshasigmaw, v4i32>;
def VSHASIGMAD : VXCR_Int_Ty<1730, "vshasigmad",
int_ppc_altivec_crypto_vshasigmad, v2i64>;
def VCIPHER : VX1_Int_Ty<1288, "vcipher", int_ppc_altivec_crypto_vcipher,
v2i64>;
def VCIPHERLAST : VX1_Int_Ty<1289, "vcipherlast",
int_ppc_altivec_crypto_vcipherlast, v2i64>;
def VNCIPHER : VX1_Int_Ty<1352, "vncipher",
int_ppc_altivec_crypto_vncipher, v2i64>;
def VNCIPHERLAST : VX1_Int_Ty<1353, "vncipherlast",
int_ppc_altivec_crypto_vncipherlast, v2i64>;
def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
} // HasP8Crypto

View File

@ -1470,6 +1470,39 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
let Inst{21-31} = xo;
}
/// VXForm_CR - VX crypto instructions with "VRT, VRA, ST, SIX"
class VXForm_CR<bits<11> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<4, OOL, IOL, asmstr, itin> {
bits<5> VD;
bits<5> VA;
bits<1> ST;
bits<4> SIX;
let Pattern = pattern;
let Inst{6-10} = VD;
let Inst{11-15} = VA;
let Inst{16} = ST;
let Inst{17-20} = SIX;
let Inst{21-31} = xo;
}
/// VXForm_BX - VX crypto instructions with "VRT, VRA, 0 - like vsbox"
class VXForm_BX<bits<11> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<4, OOL, IOL, asmstr, itin> {
bits<5> VD;
bits<5> VA;
let Pattern = pattern;
let Inst{6-10} = VD;
let Inst{11-15} = VA;
let Inst{16-20} = 0;
let Inst{21-31} = xo;
}
// E-4 VXR-Form
class VXRForm_1<bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>

View File

@ -446,6 +446,15 @@ def crrc : RegisterOperand<CRRC> {
let ParserMatchClass = PPCRegCRRCAsmOperand;
}
def PPCU1ImmAsmOperand : AsmOperandClass {
let Name = "U1Imm"; let PredicateMethod = "isU1Imm";
let RenderMethod = "addImmOperands";
}
def u1imm : Operand<i32> {
let PrintMethod = "printU1ImmOperand";
let ParserMatchClass = PPCU1ImmAsmOperand;
}
def PPCU2ImmAsmOperand : AsmOperandClass {
let Name = "U2Imm"; let PredicateMethod = "isU2Imm";
let RenderMethod = "addImmOperands";

View File

@ -69,6 +69,7 @@ void PPCSubtarget::initializeEnvironment() {
HasVSX = false;
HasP8Vector = false;
HasP8Altivec = false;
HasP8Crypto = false;
HasFCPSGN = false;
HasFSQRT = false;
HasFRE = false;

View File

@ -90,6 +90,7 @@ protected:
bool HasVSX;
bool HasP8Vector;
bool HasP8Altivec;
bool HasP8Crypto;
bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
@ -218,6 +219,7 @@ public:
bool hasVSX() const { return HasVSX; }
bool hasP8Vector() const { return HasP8Vector; }
bool hasP8Altivec() const { return HasP8Altivec; }
bool hasP8Crypto() const { return HasP8Crypto; }
bool hasMFOCRF() const { return HasMFOCRF; }
bool hasISEL() const { return HasISEL; }
bool hasPOPCNTD() const { return HasPOPCNTD; }

View File

@ -211,6 +211,51 @@ vector float f(vector float a, vector float b) {
//===----------------------------------------------------------------------===//
We should do a little better with eliminating dead stores.
The stores to the stack are dead since %a and %b are not needed
; Function Attrs: nounwind
define <16 x i8> @test_vpmsumb() #0 {
entry:
%a = alloca <16 x i8>, align 16
%b = alloca <16 x i8>, align 16
store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
%0 = load <16 x i8>* %a, align 16
%1 = load <16 x i8>* %b, align 16
%2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %2
}
; Function Attrs: nounwind readnone
declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
Produces the following code with -mtriple=powerpc64-unknown-linux-gnu:
# BB#0: # %entry
addis 3, 2, .LCPI0_0@toc@ha
addis 4, 2, .LCPI0_1@toc@ha
addi 3, 3, .LCPI0_0@toc@l
addi 4, 4, .LCPI0_1@toc@l
lxvw4x 0, 0, 3
addi 3, 1, -16
lxvw4x 35, 0, 4
stxvw4x 0, 0, 3
ori 2, 2, 0
lxvw4x 34, 0, 3
addi 3, 1, -32
stxvw4x 35, 0, 3
vpmsumb 2, 2, 3
blr
.long 0
.quad 0
The two stxvw4x instructions are not needed.
With -mtriple=powerpc64le-unknown-linux-gnu, the associated permutes
are present too.
//===----------------------------------------------------------------------===//
The following example is found in test/CodeGen/PowerPC/vec_add_sub_doubleword.ll:
define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {

View File

@ -0,0 +1,275 @@
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s
; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s
; FIXME: The original intent was to add a check-next for the blr after every check.
; However, this currently fails since we don't eliminate stores of the unused
; locals. These stores are sometimes scheduled after the crypto instruction
; Function Attrs: nounwind
define <16 x i8> @test_vpmsumb() #0 {
entry:
%a = alloca <16 x i8>, align 16
%b = alloca <16 x i8>, align 16
store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
%0 = load <16 x i8>, <16 x i8>* %a, align 16
%1 = load <16 x i8>, <16 x i8>* %b, align 16
%2 = call <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %2
; CHECK: vpmsumb 2,
}
; Function Attrs: nounwind readnone
declare <16 x i8> @llvm.ppc.altivec.crypto.vpmsumb(<16 x i8>, <16 x i8>) #1
; Function Attrs: nounwind
define <8 x i16> @test_vpmsumh() #0 {
entry:
%a = alloca <8 x i16>, align 16
%b = alloca <8 x i16>, align 16
store <8 x i16> <i16 258, i16 772, i16 1286, i16 1800, i16 2314, i16 2828, i16 3342, i16 3856>, <8 x i16>* %a, align 16
store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %b, align 16
%0 = load <8 x i16>, <8 x i16>* %a, align 16
%1 = load <8 x i16>, <8 x i16>* %b, align 16
%2 = call <8 x i16> @llvm.ppc.altivec.crypto.vpmsumh(<8 x i16> %0, <8 x i16> %1)
ret <8 x i16> %2
; CHECK: vpmsumh 2,
}
; Function Attrs: nounwind readnone
declare <8 x i16> @llvm.ppc.altivec.crypto.vpmsumh(<8 x i16>, <8 x i16>) #1
; Function Attrs: nounwind
define <4 x i32> @test_vpmsumw() #0 {
entry:
%a = alloca <4 x i32>, align 16
%b = alloca <4 x i32>, align 16
store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %b, align 16
%0 = load <4 x i32>, <4 x i32>* %a, align 16
%1 = load <4 x i32>, <4 x i32>* %b, align 16
%2 = call <4 x i32> @llvm.ppc.altivec.crypto.vpmsumw(<4 x i32> %0, <4 x i32> %1)
ret <4 x i32> %2
; CHECK: vpmsumw 2,
}
; Function Attrs: nounwind readnone
declare <4 x i32> @llvm.ppc.altivec.crypto.vpmsumw(<4 x i32>, <4 x i32>) #1
; Function Attrs: nounwind
define <2 x i64> @test_vpmsumd() #0 {
entry:
%a = alloca <2 x i64>, align 16
%b = alloca <2 x i64>, align 16
store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
%0 = load <2 x i64>, <2 x i64>* %a, align 16
%1 = load <2 x i64>, <2 x i64>* %b, align 16
%2 = call <2 x i64> @llvm.ppc.altivec.crypto.vpmsumd(<2 x i64> %0, <2 x i64> %1)
ret <2 x i64> %2
; CHECK: vpmsumd 2,
}
; Function Attrs: nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.crypto.vpmsumd(<2 x i64>, <2 x i64>) #1
; Function Attrs: nounwind
define <2 x i64> @test_vsbox() #0 {
entry:
%a = alloca <2 x i64>, align 16
store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
%0 = load <2 x i64>, <2 x i64>* %a, align 16
%1 = call <2 x i64> @llvm.ppc.altivec.crypto.vsbox(<2 x i64> %0)
ret <2 x i64> %1
; CHECK: vsbox 2,
}
; Function Attrs: nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.crypto.vsbox(<2 x i64>) #1
; Function Attrs: nounwind
define <16 x i8> @test_vpermxorb() #0 {
entry:
%a = alloca <16 x i8>, align 16
%b = alloca <16 x i8>, align 16
%c = alloca <16 x i8>, align 16
store <16 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16>, <16 x i8>* %a, align 16
store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %b, align 16
store <16 x i8> <i8 113, i8 114, i8 115, i8 116, i8 117, i8 118, i8 119, i8 120, i8 121, i8 122, i8 123, i8 124, i8 125, i8 126, i8 127, i8 112>, <16 x i8>* %c, align 16
%0 = load <16 x i8>, <16 x i8>* %a, align 16
%1 = load <16 x i8>, <16 x i8>* %b, align 16
%2 = load <16 x i8>, <16 x i8>* %c, align 16
%3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2)
ret <16 x i8> %3
; CHECK: vpermxor 2,
}
; Function Attrs: nounwind readnone
declare <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8>, <16 x i8>, <16 x i8>) #1
; Function Attrs: nounwind
define <8 x i16> @test_vpermxorh() #0 {
entry:
%a = alloca <8 x i16>, align 16
%b = alloca <8 x i16>, align 16
%c = alloca <8 x i16>, align 16
store <8 x i16> <i16 258, i16 772, i16 1286, i16 1800, i16 2314, i16 2828, i16 3342, i16 3856>, <8 x i16>* %a, align 16
store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %b, align 16
store <8 x i16> <i16 29042, i16 29556, i16 30070, i16 30584, i16 31098, i16 31612, i16 32126, i16 32624>, <8 x i16>* %c, align 16
%0 = load <8 x i16>, <8 x i16>* %a, align 16
%1 = bitcast <8 x i16> %0 to <16 x i8>
%2 = load <8 x i16>, <8 x i16>* %b, align 16
%3 = bitcast <8 x i16> %2 to <16 x i8>
%4 = load <8 x i16>, <8 x i16>* %c, align 16
%5 = bitcast <8 x i16> %4 to <16 x i8>
%6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
%7 = bitcast <16 x i8> %6 to <8 x i16>
ret <8 x i16> %7
; CHECK: vpermxor 2,
}
; Function Attrs: nounwind
define <4 x i32> @test_vpermxorw() #0 {
entry:
%a = alloca <4 x i32>, align 16
%b = alloca <4 x i32>, align 16
%c = alloca <4 x i32>, align 16
store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %b, align 16
store <4 x i32> <i32 1903326068, i32 1970698104, i32 2038070140, i32 2105442160>, <4 x i32>* %c, align 16
%0 = load <4 x i32>, <4 x i32>* %a, align 16
%1 = bitcast <4 x i32> %0 to <16 x i8>
%2 = load <4 x i32>, <4 x i32>* %b, align 16
%3 = bitcast <4 x i32> %2 to <16 x i8>
%4 = load <4 x i32>, <4 x i32>* %c, align 16
%5 = bitcast <4 x i32> %4 to <16 x i8>
%6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
%7 = bitcast <16 x i8> %6 to <4 x i32>
ret <4 x i32> %7
; CHECK: vpermxor 2,
}
; Function Attrs: nounwind
define <2 x i64> @test_vpermxord() #0 {
entry:
%a = alloca <2 x i64>, align 16
%b = alloca <2 x i64>, align 16
%c = alloca <2 x i64>, align 16
store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %c, align 16
%0 = load <2 x i64>, <2 x i64>* %a, align 16
%1 = bitcast <2 x i64> %0 to <16 x i8>
%2 = load <2 x i64>, <2 x i64>* %b, align 16
%3 = bitcast <2 x i64> %2 to <16 x i8>
%4 = load <2 x i64>, <2 x i64>* %c, align 16
%5 = bitcast <2 x i64> %4 to <16 x i8>
%6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5)
%7 = bitcast <16 x i8> %6 to <2 x i64>
ret <2 x i64> %7
; CHECK: vpermxor 2,
}
; Function Attrs: nounwind
define <2 x i64> @test_vcipher() #0 {
entry:
%a = alloca <2 x i64>, align 16
%b = alloca <2 x i64>, align 16
store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
%0 = load <2 x i64>, <2 x i64>* %a, align 16
%1 = load <2 x i64>, <2 x i64>* %b, align 16
%2 = call <2 x i64> @llvm.ppc.altivec.crypto.vcipher(<2 x i64> %0, <2 x i64> %1)
ret <2 x i64> %2
; CHECK: vcipher 2,
}
; Function Attrs: nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.crypto.vcipher(<2 x i64>, <2 x i64>) #1
; Function Attrs: nounwind
define <2 x i64> @test_vcipherlast() #0 {
entry:
%a = alloca <2 x i64>, align 16
%b = alloca <2 x i64>, align 16
store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
%0 = load <2 x i64>, <2 x i64>* %a, align 16
%1 = load <2 x i64>, <2 x i64>* %b, align 16
%2 = call <2 x i64> @llvm.ppc.altivec.crypto.vcipherlast(<2 x i64> %0, <2 x i64> %1)
ret <2 x i64> %2
; CHECK: vcipherlast 2,
}
; Function Attrs: nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.crypto.vcipherlast(<2 x i64>, <2 x i64>) #1
; Function Attrs: nounwind
define <2 x i64> @test_vncipher() #0 {
entry:
%a = alloca <2 x i64>, align 16
%b = alloca <2 x i64>, align 16
store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
%0 = load <2 x i64>, <2 x i64>* %a, align 16
%1 = load <2 x i64>, <2 x i64>* %b, align 16
%2 = call <2 x i64> @llvm.ppc.altivec.crypto.vncipher(<2 x i64> %0, <2 x i64> %1)
ret <2 x i64> %2
; CHECK: vncipher 2,
}
; Function Attrs: nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.crypto.vncipher(<2 x i64>, <2 x i64>) #1
; Function Attrs: nounwind
define <2 x i64> @test_vncipherlast() #0 {
entry:
%a = alloca <2 x i64>, align 16
%b = alloca <2 x i64>, align 16
store <2 x i64> <i64 72623859790382856, i64 651345242494996240>, <2 x i64>* %a, align 16
store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %b, align 16
%0 = load <2 x i64>, <2 x i64>* %a, align 16
%1 = load <2 x i64>, <2 x i64>* %b, align 16
%2 = call <2 x i64> @llvm.ppc.altivec.crypto.vncipherlast(<2 x i64> %0, <2 x i64> %1)
ret <2 x i64> %2
; CHECK: vncipherlast 2,
}
; Function Attrs: nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.crypto.vncipherlast(<2 x i64>, <2 x i64>) #1
; Function Attrs: nounwind
define <4 x i32> @test_vshasigmaw() #0 {
entry:
%a = alloca <4 x i32>, align 16
store <4 x i32> <i32 16909060, i32 84281096, i32 151653132, i32 219025168>, <4 x i32>* %a, align 16
%0 = load <4 x i32>, <4 x i32>* %a, align 16
%1 = call <4 x i32> @llvm.ppc.altivec.crypto.vshasigmaw(<4 x i32> %0, i32 1, i32 15)
ret <4 x i32> %1
; CHECK: vshasigmaw 2,
}
; Function Attrs: nounwind readnone
declare <4 x i32> @llvm.ppc.altivec.crypto.vshasigmaw(<4 x i32>, i32, i32) #1
; Function Attrs: nounwind
define <2 x i64> @test_vshasigmad() #0 {
entry:
%a = alloca <2 x i64>, align 16
store <2 x i64> <i64 8174723217654970232, i64 8753444600359583600>, <2 x i64>* %a, align 16
%0 = load <2 x i64>, <2 x i64>* %a, align 16
%1 = call <2 x i64> @llvm.ppc.altivec.crypto.vshasigmad(<2 x i64> %0, i32 1, i32 15)
ret <2 x i64> %1
; CHECK: vshasigmad 2,
}
; Function Attrs: nounwind readnone
declare <2 x i64> @llvm.ppc.altivec.crypto.vshasigmad(<2 x i64>, i32, i32) #1
attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
!llvm.ident = !{!0}
!0 = !{!"clang version 3.7.0 (trunk 230949) (llvm/trunk 230946)"}

View File

@ -120,6 +120,42 @@
# CHECK: vperm 2, 3, 4, 5
0x10 0x43 0x21 0x6b
# CHECK: vpermxor 2, 3, 4, 5
0x10 0x43 0x21 0x6d
# CHECK: vsbox 2, 5
0x10 0x45 0x05 0xc8
# CHECK: vcipher 2, 5, 17
0x10 0x45 0x8d 0x08
# CHECK: vcipherlast 2, 5, 17
0x10 0x45 0x8d 0x09
# CHECK: vncipher 2, 5, 17
0x10,0x45,0x8d,0x48
# CHECK: vncipherlast 2, 5, 17
0x10,0x45,0x8d,0x49
# CHECK: vpmsumb 2, 5, 17
0x10 0x45 0x8c 0x08
# CHECK: vpmsumh 2, 5, 17
0x10 0x45 0x8c 0x48
# CHECK: vpmsumw 2, 5, 17
0x10 0x45 0x8c 0x88
# CHECK: vpmsumd 2, 5, 17
0x10 0x45 0x8c 0xc8
# CHECK: vshasigmaw 2, 3, 0, 11
0x10 0x43 0x5e 0x82
# CHECK: vshasigmad 2, 3, 1, 15
0x10 0x43 0xfe 0xc2
# CHECK: vsel 2, 3, 4, 5
0x10 0x43 0x21 0x6a

View File

@ -133,6 +133,55 @@
# CHECK-BE: vperm 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x6b]
# CHECK-LE: vperm 2, 3, 4, 5 # encoding: [0x6b,0x21,0x43,0x10]
vperm 2, 3, 4, 5
# CHECK-BE: vpermxor 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x6d]
# CHECK-LE: vpermxor 2, 3, 4, 5 # encoding: [0x6d,0x21,0x43,0x10]
vpermxor 2, 3, 4, 5
# CHECK-BE: vsbox 2, 5 # encoding: [0x10,0x45,0x05,0xc8]
# CHECK-LE: vsbox 2, 5 # encoding: [0xc8,0x05,0x45,0x10]
vsbox 2, 5
# CHECK-BE: vcipher 2, 5, 17 # encoding: [0x10,0x45,0x8d,0x08]
# CHECK-LE: vcipher 2, 5, 17 # encoding: [0x08,0x8d,0x45,0x10]
vcipher 2, 5, 17
# CHECK-BE: vcipherlast 2, 5, 17 # encoding: [0x10,0x45,0x8d,0x09]
# CHECK-LE: vcipherlast 2, 5, 17 # encoding: [0x09,0x8d,0x45,0x10]
vcipherlast 2, 5, 17
# CHECK-BE: vncipher 2, 5, 17 # encoding: [0x10,0x45,0x8d,0x48]
# CHECK-LE: vncipher 2, 5, 17 # encoding: [0x48,0x8d,0x45,0x10]
vncipher 2, 5, 17
# CHECK-BE: vncipherlast 2, 5, 17 # encoding: [0x10,0x45,0x8d,0x49]
# CHECK-LE: vncipherlast 2, 5, 17 # encoding: [0x49,0x8d,0x45,0x10]
vncipherlast 2, 5, 17
# CHECK-BE: vpmsumb 2, 5, 17 # encoding: [0x10,0x45,0x8c,0x08]
# CHECK-LE: vpmsumb 2, 5, 17 # encoding: [0x08,0x8c,0x45,0x10]
vpmsumb 2, 5, 17
# CHECK-BE: vpmsumh 2, 5, 17 # encoding: [0x10,0x45,0x8c,0x48]
# CHECK-LE: vpmsumh 2, 5, 17 # encoding: [0x48,0x8c,0x45,0x10]
vpmsumh 2, 5, 17
# CHECK-BE: vpmsumw 2, 5, 17 # encoding: [0x10,0x45,0x8c,0x88]
# CHECK-LE: vpmsumw 2, 5, 17 # encoding: [0x88,0x8c,0x45,0x10]
vpmsumw 2, 5, 17
# CHECK-BE: vpmsumd 2, 5, 17 # encoding: [0x10,0x45,0x8c,0xc8]
# CHECK-LE: vpmsumd 2, 5, 17 # encoding: [0xc8,0x8c,0x45,0x10]
vpmsumd 2, 5, 17
# CHECK-BE: vshasigmaw 2, 3, 0, 11 # encoding: [0x10,0x43,0x5e,0x82]
# CHECK-LE: vshasigmaw 2, 3, 0, 11 # encoding: [0x82,0x5e,0x43,0x10]
vshasigmaw 2, 3, 0, 11
# CHECK-BE: vshasigmad 2, 3, 1, 15 # encoding: [0x10,0x43,0xfe,0xc2]
# CHECK-LE: vshasigmad 2, 3, 1, 15 # encoding: [0xc2,0xfe,0x43,0x10]
vshasigmad 2, 3, 1, 15
# CHECK-BE: vsel 2, 3, 4, 5 # encoding: [0x10,0x43,0x21,0x6a]
# CHECK-LE: vsel 2, 3, 4, 5 # encoding: [0x6a,0x21,0x43,0x10]
vsel 2, 3, 4, 5