mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-15 15:48:38 +00:00
Add more AVX2 instructions and intrinsics.
llvm-svn: 143861
This commit is contained in:
parent
7b09da38b4
commit
31b1d79474
@ -1668,22 +1668,52 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_pblendw : GCCBuiltin<"__builtin_ia32_pblendw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pblendd_128 : GCCBuiltin<"__builtin_ia32_pblendd128">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pblendd_256 : GCCBuiltin<"__builtin_ia32_pblendd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
|
||||
llvm_i32_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Vector load with broadcast
|
||||
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
|
||||
def int_x86_avx2_vbroadcast_ss_ps :
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrReadMem]>;
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastss_ps">,
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_vbroadcast_sd_pd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrReadMem]>;
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd256">,
|
||||
Intrinsic<[llvm_v4f64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_vbroadcast_ss_ps_256 :
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrReadMem]>;
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastss_ps256">,
|
||||
Intrinsic<[llvm_v8f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_vbroadcasti128 :
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastsi256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
GCCBuiltin<"__builtin_ia32_vbroadcastsi256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty], [IntrReadMem]>;
|
||||
def int_x86_avx2_pbroadcastb_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastb128">,
|
||||
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pbroadcastb_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastb256">,
|
||||
Intrinsic<[llvm_v32i8_ty], [llvm_v16i8_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pbroadcastw_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastw128">,
|
||||
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pbroadcastw_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastw256">,
|
||||
Intrinsic<[llvm_v16i16_ty], [llvm_v8i16_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pbroadcastd_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastd128">,
|
||||
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pbroadcastd_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastd256">,
|
||||
Intrinsic<[llvm_v8i32_ty], [llvm_v4i32_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pbroadcastq_128 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastq128">,
|
||||
Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
|
||||
def int_x86_avx2_pbroadcastq_256 :
|
||||
GCCBuiltin<"__builtin_ia32_pbroadcastq256">,
|
||||
Intrinsic<[llvm_v4i64_ty], [llvm_v2i64_ty], [IntrNoMem]>;
|
||||
}
|
||||
|
||||
// Misc.
|
||||
|
@ -451,6 +451,20 @@ class AVXAIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
|
||||
Requires<[HasAVX]>;
|
||||
|
||||
// AVX2 Instruction Templates:
|
||||
// Instructions introduced in AVX2 (no SSE equivalent forms)
|
||||
//
|
||||
// AVX28I - AVX2 instructions with T8 and OpSize prefix.
|
||||
// AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8.
|
||||
class AVX28I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8, OpSize,
|
||||
Requires<[HasAVX2]>;
|
||||
class AVX2Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, OpSize,
|
||||
Requires<[HasAVX2]>;
|
||||
|
||||
// AES Instruction Templates:
|
||||
//
|
||||
// AES8I
|
||||
|
@ -7083,11 +7083,12 @@ class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (Int addr:$src))]>, VEX;
|
||||
|
||||
class avx_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
Intrinsic Int> :
|
||||
AVX8I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (Int VR128:$src))]>, VEX;
|
||||
// AVX2 adds register forms
|
||||
class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC,
|
||||
Intrinsic Int> :
|
||||
AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set RC:$dst, (Int VR128:$src))]>, VEX;
|
||||
|
||||
def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem,
|
||||
int_x86_avx_vbroadcast_ss>;
|
||||
@ -7098,16 +7099,16 @@ def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem,
|
||||
def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem,
|
||||
int_x86_avx_vbroadcastf128_pd_256>;
|
||||
|
||||
let Predicates = [HasAVX2] in {
|
||||
def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128,
|
||||
int_x86_avx2_vbroadcast_ss_ps>;
|
||||
def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256,
|
||||
int_x86_avx2_vbroadcast_ss_ps_256>;
|
||||
def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256,
|
||||
int_x86_avx2_vbroadcast_sd_pd_256>;
|
||||
|
||||
let Predicates = [HasAVX2] in
|
||||
def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem,
|
||||
int_x86_avx2_vbroadcasti128>;
|
||||
def VBROADCASTSSrr : avx_broadcast_reg<0x18, "vbroadcastss", VR128,
|
||||
int_x86_avx2_vbroadcast_ss_ps>;
|
||||
def VBROADCASTSSYrr : avx_broadcast_reg<0x18, "vbroadcastss", VR256,
|
||||
int_x86_avx2_vbroadcast_ss_ps_256>;
|
||||
def VBROADCASTSDrr : avx_broadcast_reg<0x19, "vbroadcastsd", VR256,
|
||||
int_x86_avx2_vbroadcast_sd_pd_256>;
|
||||
}
|
||||
|
||||
def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
|
||||
(VBROADCASTF128 addr:$src)>;
|
||||
@ -7364,7 +7365,7 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Half precision conversion instructions
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> {
|
||||
let Predicates = [HasAVX, HasF16C] in {
|
||||
def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src),
|
||||
@ -7396,3 +7397,71 @@ defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>;
|
||||
defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>;
|
||||
defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>;
|
||||
defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AVX2 Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// AVX2I_binop_rmi_int - AVX2 binary operator with 8-bit immediate
|
||||
multiclass AVX2I_binop_rmi_int<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId, RegisterClass RC, PatFrag memop_frag,
|
||||
X86MemOperand x86memop> {
|
||||
let isCommutable = 1 in
|
||||
def rri : AVX2Ii8<opc, MRMSrcReg, (outs RC:$dst),
|
||||
(ins RC:$src1, RC:$src2, u32u8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))]>,
|
||||
VEX_4V;
|
||||
def rmi : AVX2Ii8<opc, MRMSrcMem, (outs RC:$dst),
|
||||
(ins RC:$src1, x86memop:$src2, u32u8imm:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set RC:$dst,
|
||||
(IntId RC:$src1,
|
||||
(bitconvert (memop_frag addr:$src2)), imm:$src3))]>,
|
||||
VEX_4V;
|
||||
}
|
||||
|
||||
let isCommutable = 0 in {
|
||||
defm VPBLENDD : AVX2I_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128,
|
||||
VR128, memopv16i8, i128mem>;
|
||||
defm VPBLENDDY : AVX2I_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256,
|
||||
VR256, memopv32i8, i256mem>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VPBROADCAST - Load from memory and broadcast to all elements of the
|
||||
// destination operand
|
||||
//
|
||||
multiclass avx2_broadcast<bits<8> opc, string OpcodeStr,
|
||||
X86MemOperand x86memop, PatFrag ld_frag,
|
||||
Intrinsic Int128, Intrinsic Int256> {
|
||||
def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst, (Int128 VR128:$src))]>, VEX;
|
||||
def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR128:$dst,
|
||||
(Int128 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
|
||||
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst, (Int256 VR128:$src))]>, VEX;
|
||||
def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src),
|
||||
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
|
||||
[(set VR256:$dst,
|
||||
(Int256 (scalar_to_vector (ld_frag addr:$src))))]>, VEX;
|
||||
}
|
||||
|
||||
defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8,
|
||||
int_x86_avx2_pbroadcastb_128,
|
||||
int_x86_avx2_pbroadcastb_256>;
|
||||
defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16,
|
||||
int_x86_avx2_pbroadcastw_128,
|
||||
int_x86_avx2_pbroadcastw_256>;
|
||||
defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
|
||||
int_x86_avx2_pbroadcastd_128,
|
||||
int_x86_avx2_pbroadcastd_256>;
|
||||
defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
|
||||
int_x86_avx2_pbroadcastq_128,
|
||||
int_x86_avx2_pbroadcastq_256>;
|
||||
|
@ -766,3 +766,83 @@ define <8 x float> @test_x86_avx2_vbroadcast_ss_ps_256(<4 x float> %a0) {
|
||||
ret <8 x float> %res
|
||||
}
|
||||
declare <8 x float> @llvm.x86.avx2.vbroadcast.ss.ps.256(<4 x float>) nounwind readonly
|
||||
|
||||
|
||||
define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
|
||||
; CHECK: vpblendd
|
||||
%res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i32 7) ; <<4 x i32>> [#uses=1]
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
|
||||
; CHECK: vpblendd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i32 7) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i32) nounwind readnone
|
||||
|
||||
|
||||
define <16 x i8> @test_x86_avx2_pbroadcastb_128(<16 x i8> %a0) {
|
||||
; CHECK: vpbroadcastb
|
||||
%res = call <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1]
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
declare <16 x i8> @llvm.x86.avx2.pbroadcastb.128(<16 x i8>) nounwind readonly
|
||||
|
||||
|
||||
define <32 x i8> @test_x86_avx2_pbroadcastb_256(<16 x i8> %a0) {
|
||||
; CHECK: vpbroadcastb
|
||||
%res = call <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8> %a0) ; <<32 x i8>> [#uses=1]
|
||||
ret <32 x i8> %res
|
||||
}
|
||||
declare <32 x i8> @llvm.x86.avx2.pbroadcastb.256(<16 x i8>) nounwind readonly
|
||||
|
||||
|
||||
define <8 x i16> @test_x86_avx2_pbroadcastw_128(<8 x i16> %a0) {
|
||||
; CHECK: vpbroadcastw
|
||||
%res = call <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
declare <8 x i16> @llvm.x86.avx2.pbroadcastw.128(<8 x i16>) nounwind readonly
|
||||
|
||||
|
||||
define <16 x i16> @test_x86_avx2_pbroadcastw_256(<8 x i16> %a0) {
|
||||
; CHECK: vpbroadcastw
|
||||
%res = call <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16> %a0) ; <<16 x i16>> [#uses=1]
|
||||
ret <16 x i16> %res
|
||||
}
|
||||
declare <16 x i16> @llvm.x86.avx2.pbroadcastw.256(<8 x i16>) nounwind readonly
|
||||
|
||||
|
||||
define <4 x i32> @test_x86_avx2_pbroadcastd_128(<4 x i32> %a0) {
|
||||
; CHECK: vpbroadcastd
|
||||
%res = call <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1]
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
declare <4 x i32> @llvm.x86.avx2.pbroadcastd.128(<4 x i32>) nounwind readonly
|
||||
|
||||
|
||||
define <8 x i32> @test_x86_avx2_pbroadcastd_256(<4 x i32> %a0) {
|
||||
; CHECK: vpbroadcastd
|
||||
%res = call <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32> %a0) ; <<8 x i32>> [#uses=1]
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
declare <8 x i32> @llvm.x86.avx2.pbroadcastd.256(<4 x i32>) nounwind readonly
|
||||
|
||||
|
||||
define <2 x i64> @test_x86_avx2_pbroadcastq_128(<2 x i64> %a0) {
|
||||
; CHECK: vpbroadcastq
|
||||
%res = call <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64> %a0) ; <<2 x i64>> [#uses=1]
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
declare <2 x i64> @llvm.x86.avx2.pbroadcastq.128(<2 x i64>) nounwind readonly
|
||||
|
||||
|
||||
define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
|
||||
; CHECK: vpbroadcastq
|
||||
%res = call <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64> %a0) ; <<4 x i64>> [#uses=1]
|
||||
ret <4 x i64> %res
|
||||
}
|
||||
declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
|
||||
|
Loading…
Reference in New Issue
Block a user