mirror of
https://github.com/RPCSX/llvm.git
synced 2025-03-04 19:07:26 +00:00
AMDGPU: Make v2i64/v2f64 legal types.
They can be loaded and stored, so count them as legal. This is mostly to fix a number of common cases for load/store merging. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254086 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
824e14ddab
commit
b617c550dc
@ -52,6 +52,9 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
|
||||
addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
|
||||
addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
|
||||
|
||||
addRegisterClass(MVT::v2i64, &AMDGPU::SReg_128RegClass);
|
||||
addRegisterClass(MVT::v2f64, &AMDGPU::SReg_128RegClass);
|
||||
|
||||
addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
|
||||
addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
|
||||
|
||||
@ -156,13 +159,30 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
|
||||
for (MVT VT : MVT::fp_valuetypes())
|
||||
setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
|
||||
|
||||
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
|
||||
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Expand);
|
||||
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
|
||||
|
||||
|
||||
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
|
||||
|
||||
setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
|
||||
setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::i1, Custom);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32);
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v2i64, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32);
|
||||
|
||||
setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
|
||||
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
|
||||
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
|
||||
@ -174,9 +194,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
|
||||
setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
|
||||
setOperationAction(ISD::SELECT, MVT::i1, Promote);
|
||||
|
||||
setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand);
|
||||
|
||||
|
||||
setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
|
||||
|
||||
// We only support LOAD/STORE and vector manipulation ops for vectors
|
||||
// with > 4 elements.
|
||||
for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32}) {
|
||||
for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64}) {
|
||||
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
|
||||
switch(Op) {
|
||||
case ISD::LOAD:
|
||||
@ -187,6 +212,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
|
||||
case ISD::INSERT_VECTOR_ELT:
|
||||
case ISD::INSERT_SUBVECTOR:
|
||||
case ISD::EXTRACT_SUBVECTOR:
|
||||
case ISD::SCALAR_TO_VECTOR:
|
||||
break;
|
||||
case ISD::CONCAT_VECTORS:
|
||||
setOperationAction(Op, VT, Custom);
|
||||
@ -198,6 +224,22 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
|
||||
}
|
||||
}
|
||||
|
||||
// Most operations are naturally 32-bit vector operations. We only support
|
||||
// load and store of i64 vectors, so promote v2i64 vector operations to v4i32.
|
||||
for (MVT Vec64 : { MVT::v2i64, MVT::v2f64 }) {
|
||||
setOperationAction(ISD::BUILD_VECTOR, Vec64, Promote);
|
||||
AddPromotedToType(ISD::BUILD_VECTOR, Vec64, MVT::v4i32);
|
||||
|
||||
setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec64, Promote);
|
||||
AddPromotedToType(ISD::EXTRACT_VECTOR_ELT, Vec64, MVT::v4i32);
|
||||
|
||||
setOperationAction(ISD::INSERT_VECTOR_ELT, Vec64, Promote);
|
||||
AddPromotedToType(ISD::INSERT_VECTOR_ELT, Vec64, MVT::v4i32);
|
||||
|
||||
setOperationAction(ISD::SCALAR_TO_VECTOR, Vec64, Promote);
|
||||
AddPromotedToType(ISD::SCALAR_TO_VECTOR, Vec64, MVT::v4i32);
|
||||
}
|
||||
|
||||
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
|
||||
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
|
||||
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
|
||||
|
@ -2501,6 +2501,11 @@ def : Pat <
|
||||
/********** Extraction, Insertion, Building and Casting **********/
|
||||
/********** ============================================ **********/
|
||||
|
||||
//def : Extract_Element<i64, v2i64, 0, sub0_sub1>;
|
||||
//def : Extract_Element<i64, v2i64, 1, sub2_sub3>;
|
||||
//def : Extract_Element<f64, v2f64, 0, sub0_sub1>;
|
||||
//def : Extract_Element<f64, v2f64, 1, sub2_sub3>;
|
||||
|
||||
foreach Index = 0-2 in {
|
||||
def Extract_Element_v2i32_#Index : Extract_Element <
|
||||
i32, v2i32, Index, !cast<SubRegIndex>(sub#Index)
|
||||
@ -2586,6 +2591,16 @@ def : BitConvert <f64, v2i32, VReg_64>;
|
||||
def : BitConvert <v4f32, v4i32, VReg_128>;
|
||||
def : BitConvert <v4i32, v4f32, VReg_128>;
|
||||
|
||||
|
||||
def : BitConvert <v2i64, v4i32, SReg_128>;
|
||||
def : BitConvert <v4i32, v2i64, SReg_128>;
|
||||
|
||||
def : BitConvert <v2f64, v4i32, VReg_128>;
|
||||
def : BitConvert <v4i32, v2f64, VReg_128>;
|
||||
|
||||
|
||||
|
||||
|
||||
def : BitConvert <v8f32, v8i32, SReg_256>;
|
||||
def : BitConvert <v8i32, v8f32, SReg_256>;
|
||||
def : BitConvert <v8i32, v32i8, SReg_256>;
|
||||
|
@ -193,7 +193,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
|
||||
(add SGPR_64, VCC, EXEC, FLAT_SCR)
|
||||
>;
|
||||
|
||||
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 32, (add SGPR_128)> {
|
||||
def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> {
|
||||
// Requires 2 s_mov_b64 to copy
|
||||
let CopyCost = 2;
|
||||
}
|
||||
@ -221,7 +221,7 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> {
|
||||
let CopyCost = 3;
|
||||
}
|
||||
|
||||
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32], 32, (add VGPR_128)> {
|
||||
def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> {
|
||||
// Requires 4 v_mov_b32 to copy
|
||||
let CopyCost = 4;
|
||||
}
|
||||
|
@ -61,15 +61,11 @@ define void @simple_read2_v4f32_superreg_align4(float addrspace(1)* %out) #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
; FIXME: the v_lshl_b64 x, x, 32 is a bad way of doing a copy
|
||||
|
||||
; CI-LABEL: {{^}}simple_read2_v3f32_superreg_align4:
|
||||
; CI-DAG: ds_read2_b32 v{{\[}}[[REG_X:[0-9]+]]:[[REG_Y:[0-9]+]]{{\]}}, v{{[0-9]+}} offset1:1{{$}}
|
||||
; CI-DAG: ds_read_b32 v[[REG_Z:[0-9]+]], v{{[0-9]+}} offset:8{{$}}
|
||||
; CI: v_lshr_b64 v{{\[}}[[Y_COPY:[0-9]+]]:{{[0-9]+\]}}, v{{\[}}[[REG_X]]:[[REG_Y]]{{\]}}, 32
|
||||
; CI-DAG: v_add_f32_e32 v[[ADD0:[0-9]+]], v[[REG_Z]], v[[REG_X]]
|
||||
; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[Y_COPY]], v[[ADD0]]
|
||||
; CI-DAG: v_add_f32_e32 v[[ADD1:[0-9]+]], v[[REG_Y]], v[[ADD0]]
|
||||
; CI: buffer_store_dword v[[ADD1]]
|
||||
; CI: s_endpgm
|
||||
define void @simple_read2_v3f32_superreg_align4(float addrspace(1)* %out) #0 {
|
||||
|
@ -345,8 +345,9 @@ define void @store_constant_disjoint_offsets() {
|
||||
|
||||
; SI-LABEL: @store_misaligned64_constant_offsets
|
||||
; SI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
|
||||
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
|
||||
; SI: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
|
||||
; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset1:1
|
||||
; SI-DAG: ds_write2_b32 [[ZERO]], v{{[0-9]+}}, v{{[0-9]+}} offset0:2 offset1:3
|
||||
; SI: s_endpgm
|
||||
define void @store_misaligned64_constant_offsets() {
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 0), align 4
|
||||
store i64 123, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @bar, i32 0, i32 1), align 4
|
||||
|
@ -17,3 +17,27 @@ define void @extract_vector_elt_select_error(i32 addrspace(1)* %out, i64 addrspa
|
||||
store volatile i64 %val, i64 addrspace(1)* %in
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo) nounwind {
|
||||
%p0 = extractelement <2 x i64> %foo, i32 0
|
||||
%p1 = extractelement <2 x i64> %foo, i32 1
|
||||
%out1 = getelementptr i64, i64 addrspace(1)* %out, i32 1
|
||||
store volatile i64 %p1, i64 addrspace(1)* %out
|
||||
store volatile i64 %p0, i64 addrspace(1)* %out1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @dyn_extract_vector_elt_v2i64(i64 addrspace(1)* %out, <2 x i64> %foo, i32 %elt) nounwind {
|
||||
%dynelt = extractelement <2 x i64> %foo, i32 %elt
|
||||
store volatile i64 %dynelt, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @dyn_extract_vector_elt_v2i64_2(i64 addrspace(1)* %out, <2 x i64> addrspace(1)* %foo, i32 %elt, <2 x i64> %arst) nounwind {
|
||||
%load = load volatile <2 x i64>, <2 x i64> addrspace(1)* %foo
|
||||
%or = or <2 x i64> %load, %arst
|
||||
%dynelt = extractelement <2 x i64> %or, i32 %elt
|
||||
store volatile i64 %dynelt, i64 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -23,8 +23,7 @@ define void @s_fadd_f64(double addrspace(1)* %out, double %r0, double %r1) {
|
||||
; CHECK-LABEL: {{^}}v_fadd_v2f64:
|
||||
; CHECK: v_add_f64
|
||||
; CHECK: v_add_f64
|
||||
; CHECK: buffer_store_dwordx2
|
||||
; CHECK: buffer_store_dwordx2
|
||||
; CHECK: buffer_store_dwordx4
|
||||
define void @v_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1,
|
||||
<2 x double> addrspace(1)* %in2) {
|
||||
%r0 = load <2 x double>, <2 x double> addrspace(1)* %in1
|
||||
@ -35,10 +34,9 @@ define void @v_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspac
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}s_fadd_v2f64:
|
||||
; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
|
||||
; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
|
||||
; CHECK: buffer_store_dwordx2
|
||||
; CHECK: buffer_store_dwordx2
|
||||
; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
|
||||
; CHECK: v_add_f64 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}
|
||||
; CHECK: buffer_store_dwordx4
|
||||
define void @s_fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %r0, <2 x double> %r1) {
|
||||
%r2 = fadd <2 x double> %r0, %r1
|
||||
store <2 x double> %r2, <2 x double> addrspace(1)* %out
|
||||
|
@ -49,8 +49,7 @@ define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i
|
||||
|
||||
; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
|
||||
%load = load <2 x i32>, <2 x i32> addrspace(1)* %in
|
||||
@ -63,8 +62,7 @@ define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
|
||||
%load = load <2 x i32>, <2 x i32> addrspace(1)* %in
|
||||
@ -75,10 +73,8 @@ define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
|
||||
|
||||
; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
|
||||
%load = load <4 x i32>, <4 x i32> addrspace(1)* %in
|
||||
@ -93,10 +89,8 @@ define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
|
||||
%load = load <4 x i32>, <4 x i32> addrspace(1)* %in
|
||||
@ -108,14 +102,10 @@ define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i
|
||||
; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
|
||||
%load = load <8 x i32>, <8 x i32> addrspace(1)* %in
|
||||
@ -136,15 +126,10 @@ define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
|
||||
%load = load <8 x i32>, <8 x i32> addrspace(1)* %in
|
||||
@ -163,29 +148,25 @@ define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
|
||||
%load = load <16 x i32>, <16 x i32> addrspace(1)* %in
|
||||
@ -200,23 +181,14 @@ define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: buffer_load_dwordx4
|
||||
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
|
||||
%load = load <16 x i32>, <16 x i32> addrspace(1)* %in
|
||||
@ -269,41 +241,25 @@ define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
; SI-DAG: v_ashrrev_i32
|
||||
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
|
||||
; SI: s_endpgm
|
||||
define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
|
||||
@ -323,41 +279,25 @@ define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: buffer_load_dwordx4
|
||||
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx2
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
; SI-DAG: buffer_store_dwordx4
|
||||
|
||||
; SI: s_endpgm
|
||||
define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
|
||||
|
@ -382,10 +382,9 @@ define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace
|
||||
; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
|
||||
; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
|
||||
; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
|
||||
; GCN-DAG: v_cvt_f64_f32_e32 [[CVT2:v\[[0-9]+:[0-9]+\]]], v[[CVT0]]
|
||||
; GCN-DAG: v_cvt_f64_f32_e32 [[CVT3:v\[[0-9]+:[0-9]+\]]], v[[CVT1]]
|
||||
; GCN-DAG: buffer_store_dwordx2 [[CVT2]]
|
||||
; GCN-DAG: buffer_store_dwordx2 [[CVT3]]
|
||||
; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
|
||||
; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]]
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}}
|
||||
; GCN: s_endpgm
|
||||
define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
|
||||
%val = load <2 x half>, <2 x half> addrspace(1)* %in
|
||||
@ -395,6 +394,25 @@ define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
|
||||
|
||||
; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
|
||||
; SI: v_lshr_b64 v{{\[[0-9]+:[0-9]+\]}}, [[LOAD]], 32
|
||||
; VI: v_lshrrev_b64 v{{\[[0-9]+:[0-9]+\]}}, 32, [[LOAD]]
|
||||
; GCN: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
|
||||
|
||||
; GCN: v_cvt_f32_f16_e32
|
||||
; GCN: v_cvt_f32_f16_e32
|
||||
; GCN: v_cvt_f32_f16_e32
|
||||
; GCN-NOT: v_cvt_f32_f16_e32
|
||||
|
||||
; GCN: v_cvt_f64_f32_e32
|
||||
; GCN: v_cvt_f64_f32_e32
|
||||
; GCN: v_cvt_f64_f32_e32
|
||||
; GCN-NOT: v_cvt_f64_f32_e32
|
||||
|
||||
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
||||
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
|
||||
; GCN: s_endpgm
|
||||
define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
|
||||
%val = load <3 x half>, <3 x half> addrspace(1)* %in
|
||||
%cvt = fpext <3 x half> %val to <3 x double>
|
||||
|
@ -70,8 +70,9 @@ define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x fl
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v8f32:
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; SI: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
|
||||
store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
|
||||
@ -79,10 +80,11 @@ define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x fl
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v16f32:
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; FIXMESI: buffer_store_dwordx4
|
||||
; SI: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
|
||||
store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
|
||||
@ -202,10 +204,28 @@ endif:
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v2f64:
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: s_load_dword [[IDX:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x11|0x44}}{{$}}
|
||||
; SI-DAG: s_lshl_b32 [[SCALEDIDX:s[0-9]+]], [[IDX]], 1{{$}}
|
||||
; SI-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0{{$}}
|
||||
|
||||
; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
; SI: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
|
||||
; SI: s_mov_b32 m0, [[SCALEDIDX]]
|
||||
; SI: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT0]]
|
||||
|
||||
; Increment to next element.
|
||||
; FIXME: Should be able to manipulate m0 directly instead of add and
|
||||
; copy.
|
||||
|
||||
; SI: s_or_b32 [[IDX1:s[0-9]+]], [[SCALEDIDX]], 1
|
||||
; SI-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0x40200000
|
||||
; SI-DAG: s_mov_b32 m0, [[IDX1]]
|
||||
; SI: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT1]]
|
||||
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <2 x double> %a, double 8.0, i32 %b
|
||||
@ -213,9 +233,16 @@ define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x d
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Inline immediate should be folded into v_movreld_b32.
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v2i64:
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
|
||||
; SI-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 5{{$}}
|
||||
; SI-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0{{$}}
|
||||
|
||||
; SI-DAG: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT0]]
|
||||
; SI-DAG: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT1]]
|
||||
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <2 x i64> %a, i64 5, i32 %b
|
||||
@ -223,12 +250,29 @@ define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64>
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: Should be able to do without stack access. The used stack
|
||||
; space is also 2x what should be required.
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v4f64:
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: SCRATCH_RSRC_DWORD
|
||||
|
||||
; Stack store
|
||||
; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
|
||||
|
||||
; Write element
|
||||
; SI: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
|
||||
; Stack reload
|
||||
; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
|
||||
; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
|
||||
; Store result
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
; SI: ScratchSize: 64
|
||||
|
||||
define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <4 x double> %a, double 8.0, i32 %b
|
||||
store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
|
||||
@ -236,15 +280,26 @@ define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x d
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}dynamic_insertelement_v8f64:
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: SCRATCH_RSRC_DWORD
|
||||
|
||||
; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
|
||||
; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:32{{$}}
|
||||
; SI-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:48{{$}}
|
||||
|
||||
; SI: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
|
||||
; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
|
||||
; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
|
||||
; SI-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
|
||||
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
; SI: ScratchSize: 128
|
||||
define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
|
||||
%vecins = insertelement <8 x double> %a, double 8.0, i32 %b
|
||||
store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16
|
||||
|
@ -191,9 +191,7 @@ define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}merge_global_store_2_constants_i64:
|
||||
; XGCN: buffer_store_dwordx4
|
||||
; GCN: buffer_store_dwordx2
|
||||
; GCN: buffer_store_dwordx2
|
||||
; GCN: buffer_store_dwordx4
|
||||
define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
|
||||
%out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
|
||||
|
||||
@ -203,13 +201,8 @@ define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}merge_global_store_4_constants_i64:
|
||||
; XGCN: buffer_store_dwordx4
|
||||
; XGCN: buffer_store_dwordx4
|
||||
|
||||
; GCN: buffer_store_dwordx2
|
||||
; GCN: buffer_store_dwordx2
|
||||
; GCN: buffer_store_dwordx2
|
||||
; GCN: buffer_store_dwordx2
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: buffer_store_dwordx4
|
||||
define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 {
|
||||
%out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
|
||||
%out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2
|
||||
|
@ -2,14 +2,10 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI %s
|
||||
|
||||
; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store:
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: buffer_load_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_store_dwordx2
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
define void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
|
||||
%tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
|
||||
|
Loading…
x
Reference in New Issue
Block a user