[X86][AVX512] add vmovss/sd missing encoding

Differential Revision: http://reviews.llvm.org/D14701



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254875 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Asaf Badouh 2015-12-06 13:26:56 +00:00
parent f7fc15ed79
commit 023610af4f
6 changed files with 245 additions and 39 deletions

View File

@ -1840,6 +1840,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">,
Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
[IntrReadArgMem]>;
def int_x86_avx512_mask_move_ss : GCCBuiltin<"__builtin_ia32_movss_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
[IntrNoMem]>;
def int_x86_avx512_mask_move_sd : GCCBuiltin<"__builtin_ia32_movsd_mask">,
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
[IntrNoMem]>;
}
// Conditional store ops

View File

@ -2973,53 +2973,60 @@ def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
// AVX-512 MOVSS, MOVSD
//===----------------------------------------------------------------------===//
multiclass avx512_move_scalar <string asm, RegisterClass RC,
SDNode OpNode, ValueType vt,
X86MemOperand x86memop, PatFrag mem_pat> {
let hasSideEffects = 0 in {
def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128X:$dst, (vt (OpNode VR128X:$src1,
(scalar_to_vector RC:$src2))))],
IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG;
let Constraints = "$src1 = $dst" in
def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3),
!strconcat(asm,
"\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"),
[], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K;
def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>,
EVEX, VEX_LIG;
multiclass avx512_move_scalar <string asm, SDNode OpNode,
X86VectorVTInfo _> {
defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2),
asm, "$src2, $src1","$src1, $src2",
(_.VT (OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2))),
IIC_SSE_MOV_S_RR>, EVEX_4V;
let Constraints = "$src1 = $dst" , mayLoad = 1 in
defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _,
(outs _.RC:$dst),
(ins _.ScalarMemOp:$src),
asm,"$src","$src",
(_.VT (OpNode (_.VT _.RC:$src1),
(_.VT (scalar_to_vector
(_.ScalarLdFrag addr:$src)))))>, EVEX;
let isCodeGenOnly = 1 in {
def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, _.FRC:$src2),
!strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
(scalar_to_vector _.FRC:$src2))))],
_.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
let mayLoad = 1 in
def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
_.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
}
let mayStore = 1 in {
def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>,
EVEX, VEX_LIG;
def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src),
!strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
[], IIC_SSE_MOV_S_MR>,
EVEX, VEX_LIG, EVEX_K;
def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
!strconcat(asm, "\t{$src, $dst|$dst, $src}"),
[(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>,
EVEX;
def mrk: AVX512PI<0x11, MRMDestMem, (outs),
(ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
!strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
[], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K;
} // mayStore
} //hasSideEffects = 0
}
let ExeDomain = SSEPackedSingle in
defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem,
loadf32>, XS, EVEX_CD8<32, CD8VT1>;
defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
let ExeDomain = SSEPackedDouble in
defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem,
loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
(COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
(COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
(COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
(COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
(VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)),

View File

@ -143,7 +143,7 @@ static const IntrinsicData IntrinsicsWithChain[] = {
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),
@ -807,6 +807,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::MOVDDUP, 0),
X86_INTRINSIC_DATA(avx512_mask_movddup_512, INTR_TYPE_1OP_MASK,
X86ISD::MOVDDUP, 0),
X86_INTRINSIC_DATA(avx512_mask_move_sd, INTR_TYPE_SCALAR_MASK,
X86ISD::MOVSD, 0),
X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK,
X86ISD::MOVSS, 0),
X86_INTRINSIC_DATA(avx512_mask_movshdup_128, INTR_TYPE_1OP_MASK,
X86ISD::MOVSHDUP, 0),
X86_INTRINSIC_DATA(avx512_mask_movshdup_256, INTR_TYPE_1OP_MASK,

View File

@ -6234,3 +6234,48 @@ define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) {
}
declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8)
define <4 x float>@test_int_x86_avx512_mask_move_ss_rrk(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrk:
; CHECK: vmovss %xmm1, %xmm0, %xmm2 {%k1}
%res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
ret <4 x float> %res
}
define <4 x float>@test_int_x86_avx512_mask_move_ss_rrkz(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rrkz:
; CHECK: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z}
%res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 %x2)
ret <4 x float> %res
}
define <4 x float>@test_int_x86_avx512_mask_move_ss_rr(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_ss_rr:
; CHECK: vmovss %xmm1, %xmm0, %xmm0
%res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> zeroinitializer, i8 -1)
ret <4 x float> %res
}
declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8)
define <2 x double>@test_int_x86_avx512_mask_move_sd_rr(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rr:
; CHECK: vmovsd %xmm1, %xmm0, %xmm0
%res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 -1)
ret <2 x double> %res
}
define <2 x double>@test_int_x86_avx512_mask_move_sd_rrkz(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrkz:
; CHECK: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z}
%res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> zeroinitializer, i8 %x2)
ret <2 x double> %res
}
define <2 x double>@test_int_x86_avx512_mask_move_sd_rrk(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_move_sd_rrk:
; CHECK: vmovsd %xmm1, %xmm0, %xmm2 {%k1}
%res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
ret <2 x double> %res
}

View File

@ -19220,3 +19220,58 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: vucomiss -516(%rdx), %xmm22
// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2e,0xb2,0xfc,0xfd,0xff,0xff]
vucomiss -516(%rdx), %xmm22
// CHECK: vmovsd (%rcx), %xmm25 {%k3}
// CHECK: encoding: [0x62,0x61,0xff,0x0b,0x10,0x09]
vmovsd (%rcx), %xmm25 {%k3}
// CHECK: vmovsd (%rcx), %xmm25 {%k3} {z}
// CHECK: encoding: [0x62,0x61,0xff,0x8b,0x10,0x09]
vmovsd (%rcx), %xmm25 {%k3} {z}
// CHECK: vmovsd %xmm19, %xmm3, %xmm27 {%k3} {z}
// CHECK: encoding: [0x62,0x21,0xe7,0x8b,0x10,0xdb]
vmovsd %xmm19, %xmm3, %xmm27 {%k3} {z}
// CHECK: vmovss (%rcx), %xmm2 {%k4}
// CHECK: encoding: [0x62,0xf1,0x7e,0x0c,0x10,0x11]
vmovss (%rcx), %xmm2 {%k4}
// CHECK: vmovss (%rcx), %xmm2 {%k4} {z}
// CHECK: encoding: [0x62,0xf1,0x7e,0x8c,0x10,0x11]
vmovss (%rcx), %xmm2 {%k4} {z}
// CHECK: vmovss %xmm26, %xmm9, %xmm28 {%k4} {z}
// CHECK: encoding: [0x62,0x01,0x36,0x8c,0x10,0xe2]
vmovss %xmm26, %xmm9, %xmm28 {%k4} {z}
// CHECK: vmovsd %xmm15, %xmm22, %xmm21 {%k7} {z}
// CHECK: encoding: [0x62,0xc1,0xcf,0x87,0x10,0xef]
vmovsd %xmm15, %xmm22, %xmm21 {%k7} {z}
// CHECK: vmovsd %xmm8, %xmm13, %xmm3 {%k5} {z}
// CHECK: encoding: [0x62,0xd1,0x97,0x8d,0x10,0xd8]
vmovsd %xmm8, %xmm13, %xmm3 {%k5} {z}
// CHECK: vmovss %xmm2, %xmm27, %xmm17 {%k2} {z}
// CHECK: encoding: [0x62,0xe1,0x26,0x82,0x10,0xca]
vmovss %xmm2, %xmm27, %xmm17 {%k2} {z}
// CHECK: vmovss %xmm23, %xmm19, %xmm10 {%k3} {z}
// CHECK: encoding: [0x62,0x31,0x66,0x83,0x10,0xd7]
vmovss %xmm23, %xmm19, %xmm10 {%k3} {z}
// CHECK: vmovsd %xmm4, %xmm15, %xmm4 {%k6} {z}
// CHECK: encoding: [0x62,0xf1,0x87,0x8e,0x10,0xe4]
vmovsd %xmm4, %xmm15, %xmm4 {%k6} {z}
// CHECK: vmovsd %xmm14, %xmm2, %xmm20 {%k7} {z}
// CHECK: encoding: [0x62,0xc1,0xef,0x8f,0x10,0xe6]
vmovsd %xmm14, %xmm2, %xmm20 {%k7} {z}
// CHECK: vmovss %xmm19, %xmm11, %xmm21 {%k3} {z}
// CHECK: encoding: [0x62,0xa1,0x26,0x8b,0x10,0xeb]
vmovss %xmm19, %xmm11, %xmm21 {%k3} {z}
// CHECK: vmovss %xmm24, %xmm27, %xmm15 {%k2} {z}
// CHECK: encoding: [0x62,0x11,0x26,0x82,0x10,0xf8]
vmovss %xmm24, %xmm27, %xmm15 {%k2} {z}

View File

@ -264,3 +264,91 @@ vaddpd zmm1,zmm1,zmm2,{rz-sae}
// CHECK: vcomiss xmm16, dword ptr [rcx]
// CHECK: encoding: [0x62,0xe1,0x7c,0x08,0x2f,0x01]
vcomiss xmm16, DWORD PTR [rcx]
// CHECK: vmovss dword ptr [rcx] {k2}, xmm13
// CHECK: encoding: [0x62,0x71,0x7e,0x0a,0x11,0x29]
vmovss dword ptr [rcx]{k2},xmm13
// CHECK: vmovss dword ptr [rax + 8*r14 + 4660], xmm13
// CHECK: encoding: [0xc4,0x21,0x7a,0x11,0xac,0xf0,0x34,0x12,0x00,0x00]
vmovss dword ptr [rax+r14*8+0x1234],xmm13
// CHECK: vmovss dword ptr [rdx + 508], xmm13
// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0xfc,0x01,0x00,0x00]
vmovss dword ptr [rdx+0x1fc],xmm13
// CHECK: vmovss dword ptr [rdx + 512], xmm13
// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0x00,0x02,0x00,0x00]
vmovss dword ptr [rdx+0x200],xmm13
// CHECK: vmovss dword ptr [rdx - 512], xmm13
// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0x00,0xfe,0xff,0xff]
vmovss dword ptr [rdx-0x200],xmm13
// CHECK: vmovss dword ptr [rdx - 516], xmm13
// CHECK: encoding: [0xc5,0x7a,0x11,0xaa,0xfc,0xfd,0xff,0xff]
vmovss dword ptr [rdx-0x204],xmm13
// CHECK: vmovss dword ptr [rdx + 508], xmm5
// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0xfc,0x01,0x00,0x00]
vmovss dword ptr [rdx+0x1fc],xmm5
// CHECK: vmovss dword ptr [rdx + 512], xmm5
// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0x00,0x02,0x00,0x00]
vmovss dword ptr [rdx+0x200],xmm5
// CHECK: vmovss dword ptr [rdx - 512], xmm5
// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0x00,0xfe,0xff,0xff]
vmovss dword ptr [rdx-0x200], xmm5
// CHECK: vmovss dword ptr [rdx - 516], xmm5
// CHECK: encoding: [0xc5,0xfa,0x11,0xaa,0xfc,0xfd,0xff,0xff]
vmovss dword ptr [rdx-0x204],xmm5
// CHECK: vmovss dword ptr [rcx], xmm13
// CHECK: encoding: [0xc5,0x7a,0x11,0x29]
vmovss dword ptr [rcx],xmm13
// CHECK: vmovss xmm2, dword ptr [rcx]
// CHECK: encoding: [0xc5,0xfa,0x10,0x11]
vmovss xmm2, dword ptr [rcx]
// CHECK: vmovss xmm2 {k4}, dword ptr [rcx]
// CHECK: encoding: [0x62,0xf1,0x7e,0x0c,0x10,0x11]
vmovss xmm2{k4}, dword ptr [rcx]
// CHECK: vmovss xmm2 {k4} {z}, dword ptr [rcx]
// CHECK: encoding: [0x62,0xf1,0x7e,0x8c,0x10,0x11]
vmovss xmm2{k4} {z}, dword ptr [rcx]
// CHECK: vmovsd xmm25 , qword ptr [rcx]
// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x09]
vmovsd xmm25, qword ptr [rcx]
// CHECK: vmovsd xmm25 {k3}, qword ptr [rcx]
// CHECK: encoding: [0x62,0x61,0xff,0x0b,0x10,0x09]
vmovsd xmm25{k3}, qword ptr [rcx]
// CHECK: vmovsd xmm25 {k3} {z}, qword ptr [rcx]
// CHECK: encoding: [0x62,0x61,0xff,0x8b,0x10,0x09]
vmovsd xmm25{k3} {z}, qword ptr [rcx]
// CHECK: vmovsd xmm25 , qword ptr [rax + 8*r14 + 291]
// CHECK: encoding: [0x62,0x21,0xff,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00]
vmovsd xmm25, qword ptr [rax+r14*8+0x123]
// CHECK: vmovsd xmm25 , qword ptr [rdx + 1016]
// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x7f]
vmovsd xmm25, qword ptr [rdx+0x3f8]
// CHECK: vmovsd xmm25 , qword ptr [rdx + 1024]
// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0x00,0x04,0x00,0x00]
vmovsd xmm25, qword ptr [rdx+0x400]
// CHECK: vmovsd xmm25 , qword ptr [rdx - 1024]
// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x80]
vmovsd xmm25, qword ptr [rdx-0x400]
// CHECK: vmovsd xmm25 , qword ptr [rdx - 1032]
// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff]
vmovsd xmm25, qword ptr [rdx-0x408]