R600/SI: Fix insertelement with dynamic indices.

This didn't work for any integer vectors, and didn't
work with some sizes of float vectors. This should now
work with all sizes of float and i32 vectors.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@200619 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2014-02-02 00:05:35 +00:00
parent eb97c0499b
commit b2abb9752e
2 changed files with 186 additions and 18 deletions

View File

@ -1671,8 +1671,13 @@ def : BitConvert <i128, v4i32, VReg_128>;
def : BitConvert <v8i32, v32i8, SReg_256>;
def : BitConvert <v32i8, v8i32, SReg_256>;
def : BitConvert <v8i32, v32i8, VReg_256>;
def : BitConvert <v8i32, v8f32, VReg_256>;
def : BitConvert <v8f32, v8i32, VReg_256>;
def : BitConvert <v32i8, v8i32, VReg_256>;
def : BitConvert <v16i32, v16f32, VReg_512>;
def : BitConvert <v16f32, v16i32, VReg_512>;
/********** =================== **********/
/********** Src & Dst modifiers **********/
/********** =================== **********/
@ -2064,7 +2069,7 @@ def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
/********** Indirect adressing **********/
/********** ====================== **********/
multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST IndDst> {
// 1. Extract with offset
def : Pat<
@ -2080,21 +2085,26 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, SI_INDIRECT_DST IndDst> {
// 3. Insert with offset
def : Pat<
(vector_insert vt:$vec, f32:$val, (add i32:$idx, imm:$off)),
(vector_insert vt:$vec, eltvt:$val, (add i32:$idx, imm:$off)),
(IndDst (IMPLICIT_DEF), $vec, $idx, imm:$off, $val)
>;
// 4. Insert without offset
def : Pat<
(vector_insert vt:$vec, f32:$val, i32:$idx),
(vector_insert vt:$vec, eltvt:$val, i32:$idx),
(IndDst (IMPLICIT_DEF), $vec, $idx, 0, $val)
>;
}
defm : SI_INDIRECT_Pattern <v2f32, SI_INDIRECT_DST_V2>;
defm : SI_INDIRECT_Pattern <v4f32, SI_INDIRECT_DST_V4>;
defm : SI_INDIRECT_Pattern <v8f32, SI_INDIRECT_DST_V8>;
defm : SI_INDIRECT_Pattern <v16f32, SI_INDIRECT_DST_V16>;
defm : SI_INDIRECT_Pattern <v2f32, f32, SI_INDIRECT_DST_V2>;
defm : SI_INDIRECT_Pattern <v4f32, f32, SI_INDIRECT_DST_V4>;
defm : SI_INDIRECT_Pattern <v8f32, f32, SI_INDIRECT_DST_V8>;
defm : SI_INDIRECT_Pattern <v16f32, f32, SI_INDIRECT_DST_V16>;
defm : SI_INDIRECT_Pattern <v2i32, i32, SI_INDIRECT_DST_V2>;
defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>;
defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>;
defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>;
/********** =============== **********/
/********** Conditions **********/

View File

@ -1,17 +1,175 @@
; REQUIRES: asserts
; XFAIL: *
; RUN: llc < %s -march=r600 -mcpu=redwood -o %t
; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
define void @var_insert(<4 x i32> addrspace(1)* %out, <4 x i32> %x, i32 %val, i32 %idx) nounwind {
entry:
%tmp3 = insertelement <4 x i32> %x, i32 %val, i32 %idx ; <<4 x i32>> [#uses=1]
store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out
; FIXME: Broken on evergreen
; FIXME: For some reason the 8 and 16 vectors are being stored as
; individual elements instead of 128-bit stores.
; FIXME: Why is the constant moved into the intermediate register and
; not just directly into the vector component?
; SI-LABEL: @insertelement_v4f32_0:
; S_LOAD_DWORDX4 s{{[}}[[LOW_REG:[0-9]+]]:
; V_MOV_B32_e32
; V_MOV_B32_e32 [[CONSTREG:v[0-9]+]], 5.000000e+00
; V_MOV_B32_e32 v[[LOW_REG]], [[CONSTREG]]
; BUFFER_STORE_DWORDX4 v{{[}}[[LOW_REG]]:
define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
ret void
}
define void @var_extract(i32 addrspace(1)* %out, <4 x i32> %x, i32 %idx) nounwind {
entry:
%tmp3 = extractelement <4 x i32> %x, i32 %idx ; <<i32>> [#uses=1]
store i32 %tmp3, i32 addrspace(1)* %out
; SI-LABEL: @insertelement_v4f32_1:
define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
ret void
}
; SI-LABEL: @insertelement_v4f32_2:
define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
ret void
}
; SI-LABEL: @insertelement_v4f32_3:
define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
ret void
}
; SI-LABEL: @insertelement_v4i32_0:
define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind {
%vecins = insertelement <4 x i32> %a, i32 999, i32 0
store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
ret void
}
; SI-LABEL: @dynamic_insertelement_v2f32:
; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
; SI: BUFFER_STORE_DWORDX2 {{v\[}}[[LOW_RESULT_REG]]:
define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
%vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b
store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8
ret void
}
; SI-LABEL: @dynamic_insertelement_v4f32:
; SI: V_MOV_B32_e32 [[CONST:v[0-9]+]], 5.000000e+00
; SI: V_MOVRELD_B32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
; SI: BUFFER_STORE_DWORDX4 {{v\[}}[[LOW_RESULT_REG]]:
define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
%vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
ret void
}
; SI-LABEL: @dynamic_insertelement_v8f32:
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
%vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
ret void
}
; SI-LABEL: @dynamic_insertelement_v16f32:
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
%vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
ret void
}
; SI-LABEL: @dynamic_insertelement_v2i32:
; SI: BUFFER_STORE_DWORDX2
define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i32> %a, i32 5, i32 %b
store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8
ret void
}
; SI-LABEL: @dynamic_insertelement_v4i32:
; SI: BUFFER_STORE_DWORDX4
define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i32> %a, i32 5, i32 %b
store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
ret void
}
; SI-LABEL: @dynamic_insertelement_v8i32:
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <8 x i32> %a, i32 5, i32 %b
store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
ret void
}
; SI-LABEL: @dynamic_insertelement_v16i32:
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
; FIXMESI: BUFFER_STORE_DWORDX4
define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
%vecins = insertelement <16 x i32> %a, i32 5, i32 %b
store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64
ret void
}
; SI-LABEL: @dynamic_insertelement_v2i16:
; FIXMESI: BUFFER_STORE_DWORDX2
define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i16> %a, i16 5, i32 %b
store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8
ret void
}
; SI-LABEL: @dynamic_insertelement_v4i16:
; FIXMESI: BUFFER_STORE_DWORDX4
define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i16> %a, i16 5, i32 %b
store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 16
ret void
}
; SI-LABEL: @dynamic_insertelement_v2i8:
; FIXMESI: BUFFER_STORE_USHORT
define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <2 x i8> %a, i8 5, i32 %b
store <2 x i8> %vecins, <2 x i8> addrspace(1)* %out, align 8
ret void
}
; SI-LABEL: @dynamic_insertelement_v4i8:
; FIXMESI: BUFFER_STORE_DWORD
define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <4 x i8> %a, i8 5, i32 %b
store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 16
ret void
}
; SI-LABEL: @dynamic_insertelement_v8i8:
; FIXMESI: BUFFER_STORE_DWORDX2
define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <8 x i8> %a, i8 5, i32 %b
store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 16
ret void
}
; SI-LABEL: @dynamic_insertelement_v16i8:
; FIXMESI: BUFFER_STORE_DWORDX4
define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
%vecins = insertelement <16 x i8> %a, i8 5, i32 %b
store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
ret void
}