mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-27 06:35:30 +00:00
AVX-512: Added SHIFT instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188899 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
df40f8e8ad
commit
8ba76daba0
@ -11269,6 +11269,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
|
||||
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||
}
|
||||
case Intrinsic::x86_avx512_kortestz:
|
||||
case Intrinsic::x86_avx512_kortestc: {
|
||||
unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz)? X86::COND_E: X86::COND_B;
|
||||
SDValue LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(1));
|
||||
SDValue RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i1, Op.getOperand(2));
|
||||
SDValue CC = DAG.getConstant(X86CC, MVT::i8);
|
||||
SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
|
||||
SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
|
||||
return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
|
||||
}
|
||||
|
||||
// SSE/AVX shift intrinsics
|
||||
case Intrinsic::x86_sse2_psll_w:
|
||||
@ -12135,7 +12145,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
|
||||
|
||||
if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
|
||||
(Subtarget->hasInt256() &&
|
||||
(VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16))) {
|
||||
(VT == MVT::v4i64 || VT == MVT::v8i32 || VT == MVT::v16i16)) ||
|
||||
(Subtarget->hasAVX512() &&
|
||||
(VT == MVT::v8i64 || VT == MVT::v16i32))) {
|
||||
if (Op.getOpcode() == ISD::SHL)
|
||||
return DAG.getNode(X86ISD::VSHLI, dl, VT, R,
|
||||
DAG.getConstant(ShiftAmt, MVT::i32));
|
||||
@ -12297,7 +12309,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
|
||||
VT == MVT::v4i32 || VT == MVT::v8i16 ||
|
||||
(Subtarget->hasInt256() &&
|
||||
((VT == MVT::v4i64 && Op.getOpcode() != ISD::SRA) ||
|
||||
VT == MVT::v8i32 || VT == MVT::v16i16))) {
|
||||
VT == MVT::v8i32 || VT == MVT::v16i16)) ||
|
||||
(Subtarget->hasAVX512() && (VT == MVT::v8i64 || VT == MVT::v16i32))) {
|
||||
SDValue BaseShAmt;
|
||||
EVT EltVT = VT.getVectorElementType();
|
||||
|
||||
@ -12365,6 +12378,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
|
||||
case MVT::v4i64:
|
||||
case MVT::v8i32:
|
||||
case MVT::v16i16:
|
||||
case MVT::v16i32:
|
||||
case MVT::v8i64:
|
||||
return getTargetVShiftNode(X86ISD::VSHLI, dl, VT, R, BaseShAmt, DAG);
|
||||
}
|
||||
case ISD::SRA:
|
||||
@ -12374,6 +12389,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
|
||||
case MVT::v8i16:
|
||||
case MVT::v8i32:
|
||||
case MVT::v16i16:
|
||||
case MVT::v16i32:
|
||||
case MVT::v8i64:
|
||||
return getTargetVShiftNode(X86ISD::VSRAI, dl, VT, R, BaseShAmt, DAG);
|
||||
}
|
||||
case ISD::SRL:
|
||||
@ -12385,6 +12402,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
|
||||
case MVT::v4i64:
|
||||
case MVT::v8i32:
|
||||
case MVT::v16i16:
|
||||
case MVT::v16i32:
|
||||
case MVT::v8i64:
|
||||
return getTargetVShiftNode(X86ISD::VSRLI, dl, VT, R, BaseShAmt, DAG);
|
||||
}
|
||||
}
|
||||
@ -12393,7 +12412,8 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG,
|
||||
|
||||
// Special case in 32-bit mode, where i64 is expanded into high and low parts.
|
||||
if (!Subtarget->is64Bit() &&
|
||||
(VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64)) &&
|
||||
(VT == MVT::v2i64 || (Subtarget->hasInt256() && VT == MVT::v4i64) ||
|
||||
(Subtarget->hasAVX512() && VT == MVT::v8i64)) &&
|
||||
Amt.getOpcode() == ISD::BITCAST &&
|
||||
Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
|
||||
Amt = Amt.getOperand(0);
|
||||
@ -12442,6 +12462,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget,
|
||||
if (V.getNode())
|
||||
return V;
|
||||
|
||||
if (Subtarget->hasAVX512() && (VT == MVT::v16i32 || VT == MVT::v8i64))
|
||||
return Op;
|
||||
// AVX2 has VPSLLV/VPSRAV/VPSRLV.
|
||||
if (Subtarget->hasInt256()) {
|
||||
if (Op.getOpcode() == ISD::SRL &&
|
||||
@ -13350,6 +13372,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM";
|
||||
case X86ISD::PTEST: return "X86ISD::PTEST";
|
||||
case X86ISD::TESTP: return "X86ISD::TESTP";
|
||||
case X86ISD::TESTM: return "X86ISD::TESTM";
|
||||
case X86ISD::KORTEST: return "X86ISD::KORTEST";
|
||||
case X86ISD::KTEST: return "X86ISD::KTEST";
|
||||
case X86ISD::PALIGNR: return "X86ISD::PALIGNR";
|
||||
case X86ISD::PSHUFD: return "X86ISD::PSHUFD";
|
||||
case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW";
|
||||
|
@ -274,7 +274,7 @@ namespace llvm {
|
||||
|
||||
// PCMP* - Vector integer comparisons.
|
||||
PCMPEQ, PCMPGT,
|
||||
// PCMP*M - Vector integer comparisons, the result is in a mask vector
|
||||
// PCMP*M - Vector integer comparisons, the result is in a mask vector.
|
||||
PCMPEQM, PCMPGTM,
|
||||
|
||||
/// CMPM, CMPMU - Vector comparison generating mask bits for fp and
|
||||
@ -295,12 +295,15 @@ namespace llvm {
|
||||
// MUL_IMM - X86 specific multiply by immediate.
|
||||
MUL_IMM,
|
||||
|
||||
// PTEST - Vector bitwise comparisons
|
||||
// PTEST - Vector bitwise comparisons.
|
||||
PTEST,
|
||||
|
||||
// TESTP - Vector packed fp sign bitwise comparisons
|
||||
// TESTP - Vector packed fp sign bitwise comparisons.
|
||||
TESTP,
|
||||
|
||||
// TESTM - Vector "test" in AVX-512, the result is in a mask vector.
|
||||
TESTM,
|
||||
|
||||
// OR/AND test for masks
|
||||
KORTEST,
|
||||
KTEST,
|
||||
|
@ -1691,3 +1691,144 @@ defm VDIVPDZ : avx512_fp_packed<0x5E, "divpd", fdiv, VR512, v8f64, f512mem,
|
||||
SSE_ALU_ITINS_P.d, 0>,
|
||||
EVEX_V512, OpSize, VEX_W, EVEX_CD8<64, CD8VF>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 VPTESTM instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, RegisterClass KRC,
|
||||
RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag,
|
||||
SDNode OpNode, ValueType vt> {
|
||||
def rr : AVX5128I<opc, MRMSrcReg,
|
||||
(outs KRC:$dst), (ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2)))]>, EVEX_4V;
|
||||
def rm : AVX5128I<opc, MRMSrcMem,
|
||||
(outs KRC:$dst), (ins RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set KRC:$dst, (OpNode (vt RC:$src1),
|
||||
(bitconvert (memop_frag addr:$src2))))]>, EVEX_4V;
|
||||
}
|
||||
|
||||
defm VPTESTMDZ : avx512_vptest<0x27, "vptestmd", VK16, VR512, f512mem,
|
||||
memopv16i32, X86testm, v16i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VPTESTMQZ : avx512_vptest<0x27, "vptestmq", VK8, VR512, f512mem, memopv8i64,
|
||||
X86testm, v8i64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX-512 Shift instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
|
||||
string OpcodeStr,
|
||||
SDNode OpNode, RegisterClass RC, ValueType vt,
|
||||
X86MemOperand x86memop, PatFrag mem_frag> {
|
||||
def ri : AVX512BIi8<opc, ImmFormR, (outs RC:$dst),
|
||||
(ins RC:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (vt (OpNode RC:$src1, (i32 imm:$src2))))],
|
||||
SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
|
||||
def mi: AVX512BIi8<opc, ImmFormM, (outs RC:$dst),
|
||||
(ins x86memop:$src1, i32i8imm:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (OpNode (mem_frag addr:$src1),
|
||||
(i32 imm:$src2)))], SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
|
||||
}
|
||||
|
||||
multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
RegisterClass RC, ValueType vt, ValueType SrcVT,
|
||||
PatFrag bc_frag> {
|
||||
// src2 is always 128-bit
|
||||
def rr : AVX512BI<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, VR128X:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (vt (OpNode RC:$src1, (SrcVT VR128X:$src2))))],
|
||||
SSE_INTSHIFT_ITINS_P.rr>, EVEX_4V;
|
||||
def rm : AVX512BI<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst, (vt (OpNode RC:$src1,
|
||||
(bc_frag (memopv2i64 addr:$src2)))))],
|
||||
SSE_INTSHIFT_ITINS_P.rm>, EVEX_4V;
|
||||
}
|
||||
|
||||
defm VPSRLDZ : avx512_shift_rmi<0x72, MRM2r, MRM2m, "vpsrld", X86vsrli,
|
||||
VR512, v16i32, i512mem, memopv16i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VPSRLDZ : avx512_shift_rrm<0xD2, "vpsrld", X86vsrl,
|
||||
VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VQ>;
|
||||
|
||||
defm VPSRLQZ : avx512_shift_rmi<0x73, MRM2r, MRM2m, "vpsrlq", X86vsrli,
|
||||
VR512, v8i64, i512mem, memopv8i64>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
defm VPSRLQZ : avx512_shift_rrm<0xD3, "vpsrlq", X86vsrl,
|
||||
VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VQ>, VEX_W;
|
||||
|
||||
defm VPSLLDZ : avx512_shift_rmi<0x72, MRM6r, MRM6m, "vpslld", X86vshli,
|
||||
VR512, v16i32, i512mem, memopv16i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VPSLLDZ : avx512_shift_rrm<0xF2, "vpslld", X86vshl,
|
||||
VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VQ>;
|
||||
|
||||
defm VPSLLQZ : avx512_shift_rmi<0x73, MRM6r, MRM6m, "vpsllq", X86vshli,
|
||||
VR512, v8i64, i512mem, memopv8i64>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
defm VPSLLQZ : avx512_shift_rrm<0xF3, "vpsllq", X86vshl,
|
||||
VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VQ>, VEX_W;
|
||||
|
||||
defm VPSRADZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsrad", X86vsrai,
|
||||
VR512, v16i32, i512mem, memopv16i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VPSRADZ : avx512_shift_rrm<0xE2, "vpsrad", X86vsra,
|
||||
VR512, v16i32, v4i32, bc_v4i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VQ>;
|
||||
|
||||
defm VPSRAQZ : avx512_shift_rmi<0x72, MRM4r, MRM4m, "vpsraq", X86vsrai,
|
||||
VR512, v8i64, i512mem, memopv8i64>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VF>, VEX_W;
|
||||
defm VPSRAQZ : avx512_shift_rrm<0xE2, "vpsraq", X86vsra,
|
||||
VR512, v8i64, v2i64, bc_v2i64>, EVEX_V512,
|
||||
EVEX_CD8<64, CD8VQ>, VEX_W;
|
||||
|
||||
//===-------------------------------------------------------------------===//
|
||||
// Variable Bit Shifts
|
||||
//===-------------------------------------------------------------------===//
|
||||
multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
RegisterClass RC, ValueType vt,
|
||||
X86MemOperand x86memop, PatFrag mem_frag> {
|
||||
def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst,
|
||||
(vt (OpNode RC:$src1, (vt RC:$src2))))]>,
|
||||
EVEX_4V;
|
||||
def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set RC:$dst,
|
||||
(vt (OpNode RC:$src1, (mem_frag addr:$src2))))]>,
|
||||
EVEX_4V;
|
||||
}
|
||||
|
||||
defm VPSLLVDZ : avx512_var_shift<0x47, "vpsllvd", shl, VR512, v16i32,
|
||||
i512mem, memopv16i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VPSLLVQZ : avx512_var_shift<0x47, "vpsllvq", shl, VR512, v8i64,
|
||||
i512mem, memopv8i64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm VPSRLVDZ : avx512_var_shift<0x45, "vpsrlvd", srl, VR512, v16i32,
|
||||
i512mem, memopv16i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VPSRLVQZ : avx512_var_shift<0x45, "vpsrlvq", srl, VR512, v8i64,
|
||||
i512mem, memopv8i64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
defm VPSRAVDZ : avx512_var_shift<0x46, "vpsravd", sra, VR512, v16i32,
|
||||
i512mem, memopv16i32>, EVEX_V512,
|
||||
EVEX_CD8<32, CD8VF>;
|
||||
defm VPSRAVQZ : avx512_var_shift<0x46, "vpsravq", sra, VR512, v8i64,
|
||||
i512mem, memopv8i64>, EVEX_V512, VEX_W,
|
||||
EVEX_CD8<64, CD8VF>;
|
||||
|
@ -149,6 +149,9 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
|
||||
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
|
||||
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
|
||||
def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
|
||||
def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
|
||||
SDTCisVec<1>,
|
||||
SDTCisSameAs<2, 1>]>>;
|
||||
|
||||
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
|
||||
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
|
||||
|
108
test/CodeGen/X86/avx512-shift.ll
Normal file
108
test/CodeGen/X86/avx512-shift.ll
Normal file
@ -0,0 +1,108 @@
|
||||
;RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
|
||||
|
||||
;CHECK-LABEL: shift_16_i32
|
||||
;CHECK: vpsrld
|
||||
;CHECK: vpslld
|
||||
;CHECK: vpsrad
|
||||
;CHECK: ret
|
||||
define <16 x i32> @shift_16_i32(<16 x i32> %a) {
|
||||
%b = lshr <16 x i32> %a, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
%c = shl <16 x i32> %b, <i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
|
||||
%d = ashr <16 x i32> %c, <i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12, i32 12>
|
||||
ret <16 x i32> %d;
|
||||
}
|
||||
|
||||
;CHECK-LABEL: shift_8_i64
|
||||
;CHECK: vpsrlq
|
||||
;CHECK: vpsllq
|
||||
;CHECK: vpsraq
|
||||
;CHECK: ret
|
||||
define <8 x i64> @shift_8_i64(<8 x i64> %a) {
|
||||
%b = lshr <8 x i64> %a, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
|
||||
%c = shl <8 x i64> %b, <i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12>
|
||||
%d = ashr <8 x i64> %c, <i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12, i64 12>
|
||||
ret <8 x i64> %d;
|
||||
}
|
||||
|
||||
; CHECK-LABEL: variable_shl4
|
||||
; CHECK: vpsllvq %zmm
|
||||
; CHECK: ret
|
||||
define <8 x i64> @variable_shl4(<8 x i64> %x, <8 x i64> %y) {
|
||||
%k = shl <8 x i64> %x, %y
|
||||
ret <8 x i64> %k
|
||||
}
|
||||
|
||||
; CHECK-LABEL: variable_shl5
|
||||
; CHECK: vpsllvd %zmm
|
||||
; CHECK: ret
|
||||
define <16 x i32> @variable_shl5(<16 x i32> %x, <16 x i32> %y) {
|
||||
%k = shl <16 x i32> %x, %y
|
||||
ret <16 x i32> %k
|
||||
}
|
||||
|
||||
; CHECK-LABEL: variable_srl0
|
||||
; CHECK: vpsrlvd
|
||||
; CHECK: ret
|
||||
define <16 x i32> @variable_srl0(<16 x i32> %x, <16 x i32> %y) {
|
||||
%k = lshr <16 x i32> %x, %y
|
||||
ret <16 x i32> %k
|
||||
}
|
||||
|
||||
; CHECK-LABEL: variable_srl2
|
||||
; CHECK: psrlvq
|
||||
; CHECK: ret
|
||||
define <8 x i64> @variable_srl2(<8 x i64> %x, <8 x i64> %y) {
|
||||
%k = lshr <8 x i64> %x, %y
|
||||
ret <8 x i64> %k
|
||||
}
|
||||
|
||||
; CHECK-LABEL: variable_sra1
|
||||
; CHECK: vpsravd
|
||||
; CHECK: ret
|
||||
define <16 x i32> @variable_sra1(<16 x i32> %x, <16 x i32> %y) {
|
||||
%k = ashr <16 x i32> %x, %y
|
||||
ret <16 x i32> %k
|
||||
}
|
||||
|
||||
; CHECK-LABEL: variable_sra2
|
||||
; CHECK: vpsravq %zmm
|
||||
; CHECK: ret
|
||||
define <8 x i64> @variable_sra2(<8 x i64> %x, <8 x i64> %y) {
|
||||
%k = ashr <8 x i64> %x, %y
|
||||
ret <8 x i64> %k
|
||||
}
|
||||
|
||||
; CHECK-LABEL: variable_sra01_load
|
||||
; CHECK: vpsravd (%
|
||||
; CHECK: ret
|
||||
define <16 x i32> @variable_sra01_load(<16 x i32> %x, <16 x i32>* %y) {
|
||||
%y1 = load <16 x i32>* %y
|
||||
%k = ashr <16 x i32> %x, %y1
|
||||
ret <16 x i32> %k
|
||||
}
|
||||
|
||||
; CHECK-LABEL: variable_shl1_load
|
||||
; CHECK: vpsllvd (%
|
||||
; CHECK: ret
|
||||
define <16 x i32> @variable_shl1_load(<16 x i32> %x, <16 x i32>* %y) {
|
||||
%y1 = load <16 x i32>* %y
|
||||
%k = shl <16 x i32> %x, %y1
|
||||
ret <16 x i32> %k
|
||||
}
|
||||
; CHECK: variable_srl0_load
|
||||
; CHECK: vpsrlvd (%
|
||||
; CHECK: ret
|
||||
define <16 x i32> @variable_srl0_load(<16 x i32> %x, <16 x i32>* %y) {
|
||||
%y1 = load <16 x i32>* %y
|
||||
%k = lshr <16 x i32> %x, %y1
|
||||
ret <16 x i32> %k
|
||||
}
|
||||
|
||||
; CHECK: variable_srl3_load
|
||||
; CHECK: vpsrlvq (%
|
||||
; CHECK: ret
|
||||
define <8 x i64> @variable_srl3_load(<8 x i64> %x, <8 x i64>* %y) {
|
||||
%y1 = load <8 x i64>* %y
|
||||
%k = lshr <8 x i64> %x, %y1
|
||||
ret <8 x i64> %k
|
||||
}
|
@ -19,3 +19,19 @@ vextracti64x4 $1, %zmm9, %ymm17
|
||||
// CHECK: vextracti64x4
|
||||
// CHECK: encoding: [0x62,0x73,0xfd,0x48,0x3b,0x4f,0x10,0x01]
|
||||
vextracti64x4 $1, %zmm9, 512(%rdi)
|
||||
|
||||
// CHECK: vpsrad
|
||||
// CHECK: encoding: [0x62,0xb1,0x35,0x40,0x72,0xe1,0x02]
|
||||
vpsrad $2, %zmm17, %zmm25
|
||||
|
||||
// CHECK: vpsrad
|
||||
// CHECK: encoding: [0x62,0xf1,0x35,0x40,0x72,0x64,0xb7,0x08,0x02]
|
||||
vpsrad $2, 512(%rdi, %rsi, 4), %zmm25
|
||||
|
||||
// CHECK: vpsrad
|
||||
// CHECK: encoding: [0x62,0x21,0x1d,0x48,0xe2,0xc9]
|
||||
vpsrad %xmm17, %zmm12, %zmm25
|
||||
|
||||
// CHECK: vpsrad
|
||||
// CHECK: encoding: [0x62,0x61,0x1d,0x48,0xe2,0x4c,0xb7,0x20]
|
||||
vpsrad 512(%rdi, %rsi, 4), %zmm12, %zmm25
|
||||
|
Loading…
Reference in New Issue
Block a user