mirror of
https://github.com/RPCSX/llvm.git
synced 2024-12-13 23:18:51 +00:00
AMDGPU: Support inlineasm for packed instructions
Add packed types as legal so they may be used with inlineasm. Keep all operations expanded for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296379 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5c641cd1c6
commit
a4e4156e12
@ -126,6 +126,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass);
|
||||
}
|
||||
|
||||
if (Subtarget->hasVOP3PInsts()) {
|
||||
addRegisterClass(MVT::v2i16, &AMDGPU::SReg_32_XM0RegClass);
|
||||
addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32_XM0RegClass);
|
||||
}
|
||||
|
||||
computeRegisterProperties(STI.getRegisterInfo());
|
||||
|
||||
// We need to custom lower vector stores from local memory
|
||||
@ -202,7 +207,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
|
||||
// We only support LOAD/STORE and vector manipulation ops for vectors
|
||||
// with > 4 elements.
|
||||
for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32, MVT::v2i64, MVT::v2f64}) {
|
||||
for (MVT VT : {MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32,
|
||||
MVT::v2i64, MVT::v2f64}) {
|
||||
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
|
||||
switch (Op) {
|
||||
case ISD::LOAD:
|
||||
@ -372,6 +378,41 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::FMAD, MVT::f16, Legal);
|
||||
}
|
||||
|
||||
if (Subtarget->hasVOP3PInsts()) {
|
||||
for (MVT VT : {MVT::v2i16, MVT::v2f16}) {
|
||||
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
|
||||
switch (Op) {
|
||||
case ISD::LOAD:
|
||||
case ISD::STORE:
|
||||
case ISD::BUILD_VECTOR:
|
||||
case ISD::BITCAST:
|
||||
case ISD::EXTRACT_VECTOR_ELT:
|
||||
case ISD::INSERT_VECTOR_ELT:
|
||||
case ISD::INSERT_SUBVECTOR:
|
||||
case ISD::EXTRACT_SUBVECTOR:
|
||||
case ISD::SCALAR_TO_VECTOR:
|
||||
break;
|
||||
case ISD::CONCAT_VECTORS:
|
||||
setOperationAction(Op, VT, Custom);
|
||||
break;
|
||||
default:
|
||||
setOperationAction(Op, VT, Expand);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setOperationAction(ISD::STORE, MVT::v2i16, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v2i16, MVT::i32);
|
||||
setOperationAction(ISD::STORE, MVT::v2f16, Promote);
|
||||
AddPromotedToType(ISD::STORE, MVT::v2f16, MVT::i32);
|
||||
|
||||
setOperationAction(ISD::LOAD, MVT::v2i16, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v2i16, MVT::i32);
|
||||
setOperationAction(ISD::LOAD, MVT::v2f16, Promote);
|
||||
AddPromotedToType(ISD::LOAD, MVT::v2f16, MVT::i32);
|
||||
}
|
||||
|
||||
setTargetDAGCombine(ISD::FADD);
|
||||
setTargetDAGCombine(ISD::FSUB);
|
||||
setTargetDAGCombine(ISD::FMINNUM);
|
||||
|
57
test/CodeGen/AMDGPU/inlineasm-packed.ll
Normal file
57
test/CodeGen/AMDGPU/inlineasm-packed.ll
Normal file
@ -0,0 +1,57 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx901 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}inline_asm_input_v2i16:
|
||||
; GCN: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
define void @inline_asm_input_v2i16(i32 addrspace(1)* %out, <2 x i16> %in) #0 {
|
||||
entry:
|
||||
%val = call i32 asm "s_mov_b32 $0, $1", "=r,r"(<2 x i16> %in) #0
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}inline_asm_input_v2f16:
|
||||
; GCN: s_mov_b32 s0, s{{[0-9]+}}
|
||||
define void @inline_asm_input_v2f16(i32 addrspace(1)* %out, <2 x half> %in) #0 {
|
||||
entry:
|
||||
%val = call i32 asm "s_mov_b32 $0, $1", "=r,r"(<2 x half> %in) #0
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}inline_asm_output_v2i16:
|
||||
; GCN: s_mov_b32 s{{[0-9]+}}, s{{[0-9]+}}
|
||||
define void @inline_asm_output_v2i16(<2 x i16> addrspace(1)* %out, i32 %in) #0 {
|
||||
entry:
|
||||
%val = call <2 x i16> asm "s_mov_b32 $0, $1", "=r,r"(i32 %in) #0
|
||||
store <2 x i16> %val, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}inline_asm_output_v2f16:
|
||||
; GCN: v_mov_b32 v{{[0-9]+}}, s{{[0-9]+}}
|
||||
define void @inline_asm_output_v2f16(<2 x half> addrspace(1)* %out, i32 %in) #0 {
|
||||
entry:
|
||||
%val = call <2 x half> asm "v_mov_b32 $0, $1", "=v,r"(i32 %in) #0
|
||||
store <2 x half> %val, <2 x half> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}inline_asm_packed_v2i16:
|
||||
; GCN: v_pk_add_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
||||
define void @inline_asm_packed_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %in0, <2 x i16> %in1) #0 {
|
||||
entry:
|
||||
%val = call <2 x i16> asm "v_pk_add_u16 $0, $1, $2", "=v,r,v"(<2 x i16> %in0, <2 x i16> %in1) #0
|
||||
store <2 x i16> %val, <2 x i16> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}inline_asm_packed_v2f16:
|
||||
; GCN: v_pk_add_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
|
||||
define void @inline_asm_packed_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %in0, <2 x half> %in1) #0 {
|
||||
entry:
|
||||
%val = call <2 x half> asm "v_pk_add_f16 $0, $1, $2", "=v,r,v"(<2 x half> %in0, <2 x half> %in1) #0
|
||||
store <2 x half> %val, <2 x half> addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue
Block a user