mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-30 23:20:54 +00:00
[X86][AVX512] Add support for v2i64 fptosi/fptoui/sitofp/uitofp on AVX512DQ-only targets
Use 512-bit instructions with subvector insertion/extraction like we do in a number of similar circumstances git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287882 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
1a88fd21d5
commit
5d31f856ab
@ -1251,18 +1251,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
|
||||
if (Subtarget.hasDQI()) {
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
|
||||
if (Subtarget.hasVLX()) {
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
|
||||
setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
|
||||
|
||||
if (Subtarget.hasVLX()) {
|
||||
// Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion.
|
||||
setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
|
||||
}
|
||||
|
@ -6581,6 +6581,11 @@ let Predicates = [HasDQI, HasVLX] in {
|
||||
}
|
||||
|
||||
let Predicates = [HasDQI, NoVLX] in {
|
||||
def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
|
||||
(EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
|
||||
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
VR128X:$src1, sub_xmm)))), sub_xmm)>;
|
||||
|
||||
def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
|
||||
(EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
|
||||
(v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
@ -6591,6 +6596,11 @@ def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
|
||||
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
VR256X:$src1, sub_ymm)))), sub_ymm)>;
|
||||
|
||||
def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
|
||||
(EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
|
||||
(v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
VR128X:$src1, sub_xmm)))), sub_xmm)>;
|
||||
|
||||
def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
|
||||
(EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
|
||||
(v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
@ -6606,6 +6616,11 @@ def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
VR256X:$src1, sub_ymm)))), sub_xmm)>;
|
||||
|
||||
def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
|
||||
(EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
VR128X:$src1, sub_xmm)))), sub_xmm)>;
|
||||
|
||||
def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
|
||||
(EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
@ -6616,6 +6631,11 @@ def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
VR256X:$src1, sub_ymm)))), sub_xmm)>;
|
||||
|
||||
def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
|
||||
(EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
VR128X:$src1, sub_xmm)))), sub_xmm)>;
|
||||
|
||||
def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
|
||||
(EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
|
||||
(v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
|
||||
|
@ -716,6 +716,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
||||
// potential massive combinations (elem_num x src_type x dst_type).
|
||||
|
||||
static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
|
||||
{ ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 },
|
||||
{ ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 },
|
||||
@ -728,8 +730,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
|
||||
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 },
|
||||
{ ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 },
|
||||
|
||||
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f32, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f64, 1 },
|
||||
{ ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f64, 1 },
|
||||
|
||||
|
@ -17,7 +17,8 @@ define i32 @fptosi_double_i64(i32 %arg) {
|
||||
; SSE42: cost of 6 {{.*}} %V2I64 = fptosi
|
||||
; AVX1: cost of 6 {{.*}} %V2I64 = fptosi
|
||||
; AVX2: cost of 6 {{.*}} %V2I64 = fptosi
|
||||
; AVX512: cost of 6 {{.*}} %V2I64 = fptosi
|
||||
; AVX512F: cost of 6 {{.*}} %V2I64 = fptosi
|
||||
; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptosi
|
||||
%V2I64 = fptosi <2 x double> undef to <2 x i64>
|
||||
; SSE2: cost of 13 {{.*}} %V4I64 = fptosi
|
||||
; SSE42: cost of 13 {{.*}} %V4I64 = fptosi
|
||||
@ -79,7 +80,8 @@ define i32 @fptosi_double_i16(i32 %arg) {
|
||||
; SSE42: cost of 6 {{.*}} %V2I16 = fptosi
|
||||
; AVX1: cost of 6 {{.*}} %V2I16 = fptosi
|
||||
; AVX2: cost of 6 {{.*}} %V2I16 = fptosi
|
||||
; AVX512: cost of 6 {{.*}} %V2I16 = fptosi
|
||||
; AVX512F: cost of 6 {{.*}} %V2I16 = fptosi
|
||||
; AVX512DQ: cost of 1 {{.*}} %V2I16 = fptosi
|
||||
%V2I16 = fptosi <2 x double> undef to <2 x i16>
|
||||
; SSE2: cost of 13 {{.*}} %V4I16 = fptosi
|
||||
; SSE42: cost of 13 {{.*}} %V4I16 = fptosi
|
||||
@ -109,7 +111,8 @@ define i32 @fptosi_double_i8(i32 %arg) {
|
||||
; SSE42: cost of 6 {{.*}} %V2I8 = fptosi
|
||||
; AVX1: cost of 6 {{.*}} %V2I8 = fptosi
|
||||
; AVX2: cost of 6 {{.*}} %V2I8 = fptosi
|
||||
; AVX512: cost of 6 {{.*}} %V2I8 = fptosi
|
||||
; AVX512F: cost of 6 {{.*}} %V2I8 = fptosi
|
||||
; AVX512DQ: cost of 1 {{.*}} %V2I8 = fptosi
|
||||
%V2I8 = fptosi <2 x double> undef to <2 x i8>
|
||||
; SSE2: cost of 13 {{.*}} %V4I8 = fptosi
|
||||
; SSE42: cost of 13 {{.*}} %V4I8 = fptosi
|
||||
@ -140,7 +143,7 @@ define i32 @fptosi_float_i64(i32 %arg) {
|
||||
; AVX1: cost of 6 {{.*}} %V2I64 = fptosi
|
||||
; AVX2: cost of 6 {{.*}} %V2I64 = fptosi
|
||||
; AVX512F: cost of 6 {{.*}} %V2I64 = fptosi
|
||||
; AVX512DQ: cost of 6 {{.*}} %V2I64 = fptosi
|
||||
; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptosi
|
||||
%V2I64 = fptosi <2 x float> undef to <2 x i64>
|
||||
; SSE2: cost of 13 {{.*}} %V4I64 = fptosi
|
||||
; SSE42: cost of 13 {{.*}} %V4I64 = fptosi
|
||||
|
@ -50,7 +50,8 @@ define i32 @fptoui_double_i32(i32 %arg) {
|
||||
; SSE42: cost of 6 {{.*}} %V2I32 = fptoui
|
||||
; AVX1: cost of 6 {{.*}} %V2I32 = fptoui
|
||||
; AVX2: cost of 6 {{.*}} %V2I32 = fptoui
|
||||
; AVX512: cost of 6 {{.*}} %V2I32 = fptoui
|
||||
; AVX512F: cost of 6 {{.*}} %V2I32 = fptoui
|
||||
; AVX512DQ: cost of 1 {{.*}} %V2I32 = fptoui
|
||||
%V2I32 = fptoui <2 x double> undef to <2 x i32>
|
||||
; SSE2: cost of 13 {{.*}} %V4I32 = fptoui
|
||||
; SSE42: cost of 13 {{.*}} %V4I32 = fptoui
|
||||
@ -80,7 +81,8 @@ define i32 @fptoui_double_i16(i32 %arg) {
|
||||
; SSE42: cost of 6 {{.*}} %V2I16 = fptoui
|
||||
; AVX1: cost of 6 {{.*}} %V2I16 = fptoui
|
||||
; AVX2: cost of 6 {{.*}} %V2I16 = fptoui
|
||||
; AVX512: cost of 6 {{.*}} %V2I16 = fptoui
|
||||
; AVX512F: cost of 6 {{.*}} %V2I16 = fptoui
|
||||
; AVX512DQ: cost of 1 {{.*}} %V2I16 = fptoui
|
||||
%V2I16 = fptoui <2 x double> undef to <2 x i16>
|
||||
; SSE2: cost of 13 {{.*}} %V4I16 = fptoui
|
||||
; SSE42: cost of 13 {{.*}} %V4I16 = fptoui
|
||||
@ -110,7 +112,8 @@ define i32 @fptoui_double_i8(i32 %arg) {
|
||||
; SSE42: cost of 6 {{.*}} %V2I8 = fptoui
|
||||
; AVX1: cost of 6 {{.*}} %V2I8 = fptoui
|
||||
; AVX2: cost of 6 {{.*}} %V2I8 = fptoui
|
||||
; AVX512: cost of 6 {{.*}} %V2I8 = fptoui
|
||||
; AVX512F: cost of 6 {{.*}} %V2I8 = fptoui
|
||||
; AVX512DQ: cost of 1 {{.*}} %V2I8 = fptoui
|
||||
%V2I8 = fptoui <2 x double> undef to <2 x i8>
|
||||
; SSE2: cost of 13 {{.*}} %V4I8 = fptoui
|
||||
; SSE42: cost of 13 {{.*}} %V4I8 = fptoui
|
||||
|
@ -103,7 +103,7 @@ define i32 @sitofp_i64_double() {
|
||||
; AVX1: cost of 20 {{.*}} sitofp <2 x i64>
|
||||
; AVX2: cost of 20 {{.*}} sitofp <2 x i64>
|
||||
; AVX512F: cost of 20 {{.*}} sitofp <2 x i64>
|
||||
; AVX512DQ: cost of 20 {{.*}} sitofp <2 x i64>
|
||||
; AVX512DQ: cost of 1 {{.*}} sitofp <2 x i64>
|
||||
%cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double>
|
||||
|
||||
; SSE2: cost of 40 {{.*}} sitofp <4 x i64>
|
||||
@ -222,7 +222,7 @@ define i32 @sitofp_i64_float() {
|
||||
; AVX1: cost of 4 {{.*}} sitofp <2 x i64>
|
||||
; AVX2: cost of 4 {{.*}} sitofp <2 x i64>
|
||||
; AVX512F: cost of 4 {{.*}} sitofp <2 x i64>
|
||||
; AVX512DQ: cost of 4 {{.*}} sitofp <2 x i64>
|
||||
; AVX512DQ: cost of 1 {{.*}} sitofp <2 x i64>
|
||||
%cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float>
|
||||
|
||||
; SSE2: cost of 30 {{.*}} sitofp <4 x i64>
|
||||
|
@ -60,12 +60,9 @@ define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) {
|
||||
;
|
||||
; AVX512DQ-LABEL: fptosi_2f64_to_2i64:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vcvttsd2si %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512DQ-NEXT: vcvttsd2si %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64:
|
||||
@ -334,12 +331,9 @@ define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) {
|
||||
;
|
||||
; AVX512DQ-LABEL: fptoui_2f64_to_2i64:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vmovq %rax, %xmm1
|
||||
; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
||||
; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64:
|
||||
|
@ -58,11 +58,9 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) {
|
||||
;
|
||||
; AVX512DQ-LABEL: sitofp_2i64_to_2f64:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
|
||||
; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: sitofp_2i64_to_2f64:
|
||||
@ -497,11 +495,9 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) {
|
||||
;
|
||||
; AVX512DQ-LABEL: uitofp_2i64_to_2f64:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
|
||||
; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
|
||||
; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: uitofp_2i64_to_2f64:
|
||||
@ -2643,12 +2639,9 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) {
|
||||
;
|
||||
; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0
|
||||
; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
|
||||
; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: sitofp_load_2i64_to_2f64:
|
||||
@ -2965,12 +2958,9 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) {
|
||||
;
|
||||
; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64:
|
||||
; AVX512DQ: # BB#0:
|
||||
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
|
||||
; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1
|
||||
; AVX512DQ-NEXT: vmovq %xmm0, %rax
|
||||
; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0
|
||||
; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
|
||||
; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; AVX512VLDQ-LABEL: uitofp_load_2i64_to_2f64:
|
||||
|
@ -20,14 +20,29 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
;
|
||||
|
||||
define void @sitofp_2i64_2f64() #0 {
|
||||
; CHECK-LABEL: @sitofp_2i64_2f64(
|
||||
; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||
; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||
; CHECK-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
|
||||
; CHECK-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
|
||||
; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; CHECK-NEXT: ret void
|
||||
; SSE-LABEL: @sitofp_2i64_2f64(
|
||||
; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
|
||||
; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
|
||||
; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; SSE-NEXT: ret void
|
||||
;
|
||||
; AVX256-LABEL: @sitofp_2i64_2f64(
|
||||
; AVX256-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||
; AVX256-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||
; AVX256-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double
|
||||
; AVX256-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double
|
||||
; AVX256-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64
|
||||
; AVX256-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
|
||||
; AVX256-NEXT: ret void
|
||||
;
|
||||
; AVX512-LABEL: @sitofp_2i64_2f64(
|
||||
; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64
|
||||
; AVX512-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double>
|
||||
; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64
|
||||
; AVX512-NEXT: ret void
|
||||
;
|
||||
%ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64
|
||||
%ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8
|
||||
|
Loading…
Reference in New Issue
Block a user