mirror of
https://github.com/RPCS3/llvm.git
synced 2024-11-28 22:20:43 +00:00
Add a bunch more X86 AVX2 instructions and their corresponding intrinsics.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143529 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3eae0c2fa6
commit
3f2b2c218f
@ -1525,6 +1525,110 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
llvm_v16i16_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Absolute value ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pabs_b : GCCBuiltin<"__builtin_ia32_pabsb256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pabs_w : GCCBuiltin<"__builtin_ia32_pabsw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pabs_d : GCCBuiltin<"__builtin_ia32_pabsd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Horizontal arithmetic ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_phadd_w : GCCBuiltin<"__builtin_ia32_phaddw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_phadd_d : GCCBuiltin<"__builtin_ia32_phaddd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_phadd_sw : GCCBuiltin<"__builtin_ia32_phaddsw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_phsub_w : GCCBuiltin<"__builtin_ia32_phsubw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_phsub_d : GCCBuiltin<"__builtin_ia32_phsubd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_phsub_sw : GCCBuiltin<"__builtin_ia32_phsubsw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pmadd_ub_sw : GCCBuiltin<"__builtin_ia32_pmaddubsw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Sign ops
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_psign_b : GCCBuiltin<"__builtin_ia32_psignb256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_psign_w : GCCBuiltin<"__builtin_ia32_psignw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_psign_d : GCCBuiltin<"__builtin_ia32_psignd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
|
||||
llvm_v8i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Packed multiply high with round and scale
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pmul_hr_sw : GCCBuiltin<"__builtin_ia32_pmulhrsw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
|
||||
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
|
||||
}
|
||||
|
||||
// Vector sign and zero extend
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pmovsxbd : GCCBuiltin<"__builtin_ia32_pmovsxbd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pmovsxbq : GCCBuiltin<"__builtin_ia32_pmovsxbq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pmovsxbw : GCCBuiltin<"__builtin_ia32_pmovsxbw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pmovsxdq : GCCBuiltin<"__builtin_ia32_pmovsxdq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pmovsxwd : GCCBuiltin<"__builtin_ia32_pmovsxwd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pmovsxwq : GCCBuiltin<"__builtin_ia32_pmovsxwq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pmovzxbw : GCCBuiltin<"__builtin_ia32_pmovzxbw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty],
|
||||
[IntrNoMem]>;
|
||||
def int_x86_avx2_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty],
|
||||
[IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Misc.
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pshuf_b : GCCBuiltin<"__builtin_ia32_pshufb256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
|
||||
llvm_v32i8_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MMX
|
||||
|
||||
|
@ -276,11 +276,12 @@ def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>;
|
||||
def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
|
||||
|
||||
// 256-bit memop pattern fragments
|
||||
def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
|
||||
def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
|
||||
def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
|
||||
def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>;
|
||||
def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>;
|
||||
def memopv16i16 : PatFrag<(ops node:$ptr), (v16i16 (memop node:$ptr))>;
|
||||
def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>;
|
||||
|
||||
// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
|
||||
// 16-byte boundary.
|
||||
@ -326,6 +327,8 @@ def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
|
||||
def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
|
||||
|
||||
// 256-bit bitconvert pattern fragments
|
||||
def bc_v32i8 : PatFrag<(ops node:$in), (v32i8 (bitconvert node:$in))>;
|
||||
def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>;
|
||||
def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
|
||||
def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
|
||||
|
||||
|
@ -4008,6 +4008,23 @@ def mi : Ii8<0x70, MRMSrcMem,
|
||||
(bc_frag (memopv2i64 addr:$src1)),
|
||||
(undef))))]>;
|
||||
}
|
||||
|
||||
multiclass sse2_pshuffle_y<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
|
||||
PatFrag bc_frag> {
|
||||
def Yri : Ii8<0x70, MRMSrcReg,
|
||||
(outs VR256:$dst), (ins VR256:$src1, i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst, (vt (pshuf_frag:$src2 VR256:$src1,
|
||||
(undef))))]>;
|
||||
def Ymi : Ii8<0x70, MRMSrcMem,
|
||||
(outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst, (vt (pshuf_frag:$src2
|
||||
(bc_frag (memopv4i64 addr:$src1)),
|
||||
(undef))))]>;
|
||||
}
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
@ -4052,6 +4069,20 @@ let Predicates = [HasAVX] in {
|
||||
(VPSHUFLWmi addr:$src, imm:$imm)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
let AddedComplexity = 5 in
|
||||
defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, pshufd, bc_v8i32>, TB,
|
||||
OpSize, VEX;
|
||||
|
||||
// SSE2 with ImmT == Imm8 and XS prefix.
|
||||
defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, pshufhw, bc_v16i16>, XS,
|
||||
VEX;
|
||||
|
||||
// SSE2 with ImmT == Imm8 and XD prefix.
|
||||
defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, pshuflw, bc_v16i16>, XD,
|
||||
VEX;
|
||||
}
|
||||
|
||||
let Predicates = [HasSSE2] in {
|
||||
let AddedComplexity = 5 in
|
||||
defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
|
||||
@ -4114,6 +4145,19 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
|
||||
addr:$src2))))]>;
|
||||
}
|
||||
|
||||
multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt,
|
||||
SDNode OpNode, PatFrag bc_frag> {
|
||||
def Yrr : PDI<opc, MRMSrcReg,
|
||||
(outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
|
||||
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>;
|
||||
def Yrm : PDI<opc, MRMSrcMem,
|
||||
(outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
|
||||
!strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst, (OpNode VR256:$src1,
|
||||
(bc_frag (memopv4i64 addr:$src2))))]>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw,
|
||||
bc_v16i8, 0>, VEX_4V;
|
||||
@ -4156,6 +4200,48 @@ let Predicates = [HasAVX] in {
|
||||
(memopv2i64 addr:$src2))))]>, VEX_4V;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw,
|
||||
bc_v32i8>, VEX_4V;
|
||||
defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd,
|
||||
bc_v16i16>, VEX_4V;
|
||||
defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq,
|
||||
bc_v8i32>, VEX_4V;
|
||||
|
||||
/// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
|
||||
/// knew to collapse (bitconvert VT to VT) into its operand.
|
||||
def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg,
|
||||
(outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
|
||||
"vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1,
|
||||
VR256:$src2)))]>, VEX_4V;
|
||||
def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem,
|
||||
(outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
|
||||
"vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1,
|
||||
(memopv4i64 addr:$src2))))]>, VEX_4V;
|
||||
|
||||
defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw,
|
||||
bc_v32i8>, VEX_4V;
|
||||
defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd,
|
||||
bc_v16i16>, VEX_4V;
|
||||
defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq,
|
||||
bc_v8i32>, VEX_4V;
|
||||
|
||||
/// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen
|
||||
/// knew to collapse (bitconvert VT to VT) into its operand.
|
||||
def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg,
|
||||
(outs VR256:$dst), (ins VR256:$src1, VR256:$src2),
|
||||
"vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1,
|
||||
VR256:$src2)))]>, VEX_4V;
|
||||
def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem,
|
||||
(outs VR256:$dst), (ins VR256:$src1, i256mem:$src2),
|
||||
"vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1,
|
||||
(memopv4i64 addr:$src2))))]>, VEX_4V;
|
||||
}
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>;
|
||||
defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>;
|
||||
@ -4266,6 +4352,15 @@ def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
|
||||
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
|
||||
def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
|
||||
"pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
|
||||
"pmovmskb\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX;
|
||||
def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
|
||||
"pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
|
||||
}
|
||||
|
||||
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
|
||||
"pmovmskb\t{$src, $dst|$dst, $src}",
|
||||
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
|
||||
@ -5016,6 +5111,23 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
|
||||
(bitconvert (mem_frag128 addr:$src))))]>, OpSize;
|
||||
}
|
||||
|
||||
/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
|
||||
multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr,
|
||||
PatFrag mem_frag256, Intrinsic IntId256> {
|
||||
def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (IntId256 VR256:$src))]>,
|
||||
OpSize;
|
||||
|
||||
def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins i256mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst,
|
||||
(IntId256
|
||||
(bitconvert (mem_frag256 addr:$src))))]>, OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
|
||||
int_x86_ssse3_pabs_b_128>, VEX;
|
||||
@ -5025,6 +5137,15 @@ let Predicates = [HasAVX] in {
|
||||
int_x86_ssse3_pabs_d_128>, VEX;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", memopv32i8,
|
||||
int_x86_avx2_pabs_b>, VEX;
|
||||
defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", memopv16i16,
|
||||
int_x86_avx2_pabs_w>, VEX;
|
||||
defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", memopv8i32,
|
||||
int_x86_avx2_pabs_d>, VEX;
|
||||
}
|
||||
|
||||
defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8,
|
||||
int_x86_ssse3_pabs_b_128>;
|
||||
defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16,
|
||||
@ -5055,7 +5176,23 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1,
|
||||
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
|
||||
(bitconvert (mem_frag128 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
|
||||
multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
|
||||
PatFrag mem_frag256, Intrinsic IntId256> {
|
||||
let isCommutable = 1 in
|
||||
def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>,
|
||||
OpSize;
|
||||
def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, i256mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
|
||||
[(set VR256:$dst,
|
||||
(IntId256 VR256:$src1,
|
||||
(bitconvert (mem_frag256 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
|
||||
let ImmT = NoImm, Predicates = [HasAVX] in {
|
||||
@ -5087,6 +5224,35 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16,
|
||||
int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
|
||||
}
|
||||
|
||||
let ImmT = NoImm, Predicates = [HasAVX2] in {
|
||||
let isCommutable = 0 in {
|
||||
defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", memopv16i16,
|
||||
int_x86_avx2_phadd_w>, VEX_4V;
|
||||
defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", memopv8i32,
|
||||
int_x86_avx2_phadd_d>, VEX_4V;
|
||||
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", memopv16i16,
|
||||
int_x86_avx2_phadd_sw>, VEX_4V;
|
||||
defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", memopv16i16,
|
||||
int_x86_avx2_phsub_w>, VEX_4V;
|
||||
defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", memopv8i32,
|
||||
int_x86_avx2_phsub_d>, VEX_4V;
|
||||
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", memopv16i16,
|
||||
int_x86_avx2_phsub_sw>, VEX_4V;
|
||||
defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", memopv32i8,
|
||||
int_x86_avx2_pmadd_ub_sw>, VEX_4V;
|
||||
defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8,
|
||||
int_x86_avx2_pshuf_b>, VEX_4V;
|
||||
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv16i8,
|
||||
int_x86_avx2_psign_b>, VEX_4V;
|
||||
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv8i16,
|
||||
int_x86_avx2_psign_w>, VEX_4V;
|
||||
defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv4i32,
|
||||
int_x86_avx2_psign_d>, VEX_4V;
|
||||
}
|
||||
defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16,
|
||||
int_x86_avx2_pmul_hr_sw>, VEX_4V;
|
||||
}
|
||||
|
||||
// None of these have i8 immediate fields.
|
||||
let ImmT = NoImm, Constraints = "$src1 = $dst" in {
|
||||
let isCommutable = 0 in {
|
||||
@ -5166,8 +5332,23 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
|
||||
[]>, OpSize;
|
||||
}
|
||||
|
||||
multiclass ssse3_palign_y<string asm, bit Is2Addr = 1> {
|
||||
def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
|
||||
!strconcat(asm,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, OpSize;
|
||||
def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst),
|
||||
(ins VR256:$src1, i256mem:$src2, i8imm:$src3),
|
||||
!strconcat(asm,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in
|
||||
defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
|
||||
let Predicates = [HasAVX2] in
|
||||
defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V;
|
||||
let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in
|
||||
defm PALIGN : ssse3_palign<"palignr">;
|
||||
|
||||
@ -5235,6 +5416,17 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
|
||||
OpSize;
|
||||
}
|
||||
|
||||
multiclass SS41I_binop_rm_int16_y<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId> {
|
||||
def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
|
||||
|
||||
def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (IntId (load addr:$src)))]>, OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
|
||||
VEX;
|
||||
@ -5250,6 +5442,21 @@ defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
|
||||
VEX;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw",
|
||||
int_x86_avx2_pmovsxbw>, VEX;
|
||||
defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd",
|
||||
int_x86_avx2_pmovsxwd>, VEX;
|
||||
defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq",
|
||||
int_x86_avx2_pmovsxdq>, VEX;
|
||||
defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw",
|
||||
int_x86_avx2_pmovzxbw>, VEX;
|
||||
defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd",
|
||||
int_x86_avx2_pmovzxwd>, VEX;
|
||||
defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq",
|
||||
int_x86_avx2_pmovzxdq>, VEX;
|
||||
}
|
||||
|
||||
defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
|
||||
defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
|
||||
defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
|
||||
@ -5336,6 +5543,19 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
|
||||
OpSize;
|
||||
}
|
||||
|
||||
multiclass SS41I_binop_rm_int8_y<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId> {
|
||||
def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
|
||||
|
||||
def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i32mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst,
|
||||
(IntId (bitconvert (v2i64 (scalar_to_vector (loadi64 addr:$src))))))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
|
||||
VEX;
|
||||
@ -5347,6 +5567,17 @@ defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
|
||||
VEX;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd",
|
||||
int_x86_avx2_pmovsxbd>, VEX;
|
||||
defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq",
|
||||
int_x86_avx2_pmovsxwq>, VEX;
|
||||
defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd",
|
||||
int_x86_avx2_pmovzxbd>, VEX;
|
||||
defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq",
|
||||
int_x86_avx2_pmovzxwq>, VEX;
|
||||
}
|
||||
|
||||
defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
|
||||
defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
|
||||
defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
|
||||
@ -5391,12 +5622,32 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
|
||||
OpSize;
|
||||
}
|
||||
|
||||
multiclass SS41I_binop_rm_int4_y<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId> {
|
||||
def Yrr : SS48I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (IntId VR128:$src))]>, OpSize;
|
||||
|
||||
// Expecting a i16 load any extended to i32 value.
|
||||
def Yrm : SS48I<opc, MRMSrcMem, (outs VR256:$dst), (ins i16mem:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (IntId (bitconvert
|
||||
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))]>,
|
||||
OpSize;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
|
||||
VEX;
|
||||
defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
|
||||
VEX;
|
||||
}
|
||||
let Predicates = [HasAVX2] in {
|
||||
defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq",
|
||||
int_x86_avx2_pmovsxbq>, VEX;
|
||||
defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq",
|
||||
int_x86_avx2_pmovzxbq>, VEX;
|
||||
}
|
||||
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
|
||||
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
|
||||
|
||||
|
@ -160,6 +160,14 @@ define <32 x i8> @test_x86_avx2_pminu_b(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) {
|
||||
; CHECK: vpmovmskb
|
||||
%res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1]
|
||||
ret i32 %res
|
||||
}
|
||||
declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; CHECK: vpmulhw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
|
||||
@ -382,3 +390,219 @@ define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <32 x i8> @test_x86_avx2_pabs_b(<32 x i8> %a0) {
|
||||
; CHECK: vpabsb
|
||||
%res = call <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8> %a0) ; <<32 x i8>> [#uses=1]
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pabs_d(<8 x i32> %a0) {
|
||||
; CHECK: vpabsd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32> %a0) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_pabs_w(<16 x i16> %a0) {
|
||||
; CHECK: vpabsw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16> %a0) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK: vphaddd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; CHECK: vphaddsw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; CHECK: vphaddw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK: vphsubd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; CHECK: vphsubsw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; CHECK: vphsubw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
; CHECK: vpmaddubsw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; CHECK: vpmulhrsw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
; CHECK: vpshufb
|
||||
%res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1]
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) {
|
||||
; CHECK: vpsignb
|
||||
%res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK: vpsignd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) {
|
||||
; CHECK: vpsignw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
|
||||
; CHECK: vpmovsxbd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
|
||||
; CHECK: vpmovsxbq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
|
||||
; CHECK: vpmovsxbw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
|
||||
; CHECK: vpmovsxdq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
|
||||
; CHECK: vpmovsxwd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
|
||||
; CHECK: vpmovsxwq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
|
||||
; CHECK: vpmovzxbd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
|
||||
; CHECK: vpmovzxbq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
|
||||
; CHECK: vpmovzxbw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
|
||||
; CHECK: vpmovzxdq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
|
||||
; CHECK: vpmovzxwd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
|
||||
; CHECK: vpmovzxwq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
|
||||
|
Loading…
Reference in New Issue
Block a user