diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td index 84d3118e6ad..d26be326094 100644 --- a/lib/Target/R600/AMDGPUCallingConv.td +++ b/lib/Target/R600/AMDGPUCallingConv.td @@ -19,7 +19,8 @@ def CC_SI : CallingConv<[ CCIfInReg>>, CCIfInReg(Op.getOperand(1))->getZExtValue(); + + switch (IntrinsicID) { + case AMDGPUIntrinsic::SI_tbuffer_store: { + SDLoc DL(Op); + SDValue Ops [] = { + Chain, + ResourceDescriptorToi128(Op.getOperand(2), DAG), + Op.getOperand(3), + Op.getOperand(4), + Op.getOperand(5), + Op.getOperand(6), + Op.getOperand(7), + Op.getOperand(8), + Op.getOperand(9), + Op.getOperand(10), + Op.getOperand(11), + Op.getOperand(12), + Op.getOperand(13), + Op.getOperand(14) + }; + EVT VT = Op.getOperand(3).getValueType(); + + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(), + MachineMemOperand::MOStore, + VT.getSizeInBits() / 8, 4); + return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, + Op->getVTList(), Ops, + sizeof(Ops)/sizeof(Ops[0]), VT, MMO); + } + default: + break; + } } return SDValue(); } diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 09d5f01dbfb..e7d70f420cb 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -21,6 +21,25 @@ def SIload_constant : SDNode<"AMDGPUISD::LOAD_CONSTANT", [SDNPMayLoad, SDNPMemOperand] >; +def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT", + SDTypeProfile<0, 13, + [SDTCisVT<0, i128>, // rsrc(SGPR) + SDTCisVT<1, iAny>, // vdata(VGPR) + SDTCisVT<2, i32>, // num_channels(imm) + SDTCisVT<3, i32>, // vaddr(VGPR) + SDTCisVT<4, i32>, // soffset(SGPR) + SDTCisVT<5, i32>, // inst_offset(imm) + SDTCisVT<6, i32>, // dfmt(imm) + SDTCisVT<7, i32>, // nfmt(imm) + SDTCisVT<8, i32>, // offen(imm) + SDTCisVT<9, i32>, // idxen(imm) + SDTCisVT<10, i32>, // glc(imm) + SDTCisVT<11, i32>, // slc(imm) + SDTCisVT<12, i32> // tfe(imm) + ]>, + [SDNPMayStore, SDNPMemOperand, SDNPHasChain] +>; + def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT", SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, i128>, SDTCisVT<2, i16>, SDTCisVT<3, i32>]> @@ -65,6 +84,14 @@ def IMM8bitDWORD : ImmLeaf < }]> >; +def as_i1imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), MVT::i1); +}]>; + +def as_i8imm : SDNodeXFormgetTargetConstant(N->getZExtValue(), MVT::i8); +}]>; + def as_i16imm : SDNodeXFormgetTargetConstant(N->getSExtValue(), MVT::i16); }]>; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 14a189a07af..99fedcb95a4 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -477,10 +477,10 @@ def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < //def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "TBUFFER_LOAD_FORMAT_XY", []>; //def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "TBUFFER_LOAD_FORMAT_XYZ", []>; def TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "TBUFFER_LOAD_FORMAT_XYZW", VReg_128>; -//def TBUFFER_STORE_FORMAT_X : MTBUF_ <0x00000004, "TBUFFER_STORE_FORMAT_X", []>; -//def TBUFFER_STORE_FORMAT_XY : MTBUF_ <0x00000005, "TBUFFER_STORE_FORMAT_XY", []>; -//def TBUFFER_STORE_FORMAT_XYZ : MTBUF_ <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", []>; -//def TBUFFER_STORE_FORMAT_XYZW : MTBUF_ <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", []>; +def TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "TBUFFER_STORE_FORMAT_X", VReg_32>; +def TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "TBUFFER_STORE_FORMAT_XY", VReg_64>; +def TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "TBUFFER_STORE_FORMAT_XYZ", VReg_128>; +def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "TBUFFER_STORE_FORMAT_XYZW", VReg_128>; let mayLoad = 1 in { @@ -1881,6 +1881,27 @@ defm : MUBUFStore_Pattern ; defm : MUBUFStore_Pattern ; defm : MUBUFStore_Pattern ; +//===----------------------------------------------------------------------===// +// MTBUF Patterns +//===----------------------------------------------------------------------===// + +// TBUFFER_STORE_FORMAT_*, addr64=0 +class MTBUF_StoreResource : Pat< + (SItbuffer_store i128:$rsrc, vt:$vdata, num_channels, i32:$vaddr, + i32:$soffset, imm:$inst_offset, imm:$dfmt, + imm:$nfmt, imm:$offen, imm:$idxen, + imm:$glc, imm:$slc, imm:$tfe), + (opcode + $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen), + (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc, + (as_i1imm $slc), (as_i1imm $tfe), $soffset) +>; + +def : MTBUF_StoreResource ; +def : MTBUF_StoreResource ; +def : MTBUF_StoreResource ; +def : MTBUF_StoreResource ; + /********** ====================== **********/ /********** Indirect adressing **********/ /********** ====================== **********/ diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index d6e26adf65b..7fcc9645211 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -20,6 +20,24 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; + // Fully-flexible TBUFFER_STORE_FORMAT_* except for the ADDR64 bit, which is not exposed + def int_SI_tbuffer_store : Intrinsic < + [], + [llvm_anyint_ty, // rsrc(SGPR) + llvm_anyint_ty, // vdata(VGPR), overloaded for types i32, v2i32, v4i32 + llvm_i32_ty, // num_channels(imm), selects opcode suffix: 1=X, 2=XY, 3=XYZ, 4=XYZW + llvm_i32_ty, // vaddr(VGPR) + llvm_i32_ty, // soffset(SGPR) + llvm_i32_ty, // inst_offset(imm) + llvm_i32_ty, // dfmt(imm) + llvm_i32_ty, // nfmt(imm) + llvm_i32_ty, // offen(imm) + llvm_i32_ty, // idxen(imm) + llvm_i32_ty, // glc(imm) + llvm_i32_ty, // slc(imm) + llvm_i32_ty], // tfe(imm) + []>; + class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_sample : Sample; diff --git a/test/CodeGen/R600/llvm.SI.tbuffer.store.ll b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll new file mode 100644 index 00000000000..92d16c560aa --- /dev/null +++ b/test/CodeGen/R600/llvm.SI.tbuffer.store.ll @@ -0,0 +1,44 @@ +;RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s + +;CHECK_LABEL: @test1 +;CHECK: TBUFFER_STORE_FORMAT_XYZW {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 32, -1, 0, -1, 0, 14, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0 +define void @test1(i32 %a1, i32 %vaddr) { + %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 + call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata, + i32 4, i32 %vaddr, i32 0, i32 32, i32 14, i32 4, i32 1, i32 0, i32 1, + i32 1, i32 0) + ret void +} + +;CHECK_LABEL: @test2 +;CHECK: TBUFFER_STORE_FORMAT_XYZ {{VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+_VGPR[0-9]+}}, 24, -1, 0, -1, 0, 13, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0 +define void @test2(i32 %a1, i32 %vaddr) { + %vdata = insertelement <4 x i32> undef, i32 %a1, i32 0 + call void @llvm.SI.tbuffer.store.v4i32(<16 x i8> undef, <4 x i32> %vdata, + i32 3, i32 %vaddr, i32 0, i32 24, i32 13, i32 4, i32 1, i32 0, i32 1, + i32 1, i32 0) + ret void +} + +;CHECK_LABEL: @test3 +;CHECK: TBUFFER_STORE_FORMAT_XY {{VGPR[0-9]+_VGPR[0-9]+}}, 16, -1, 0, -1, 0, 11, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0 +define void @test3(i32 %a1, i32 %vaddr) { + %vdata = insertelement <2 x i32> undef, i32 %a1, i32 0 + call void @llvm.SI.tbuffer.store.v2i32(<16 x i8> undef, <2 x i32> %vdata, + i32 2, i32 %vaddr, i32 0, i32 16, i32 11, i32 4, i32 1, i32 0, i32 1, + i32 1, i32 0) + ret void +} + +;CHECK_LABEL: @test4 +;CHECK: TBUFFER_STORE_FORMAT_X {{VGPR[0-9]+}}, 8, -1, 0, -1, 0, 4, 4, {{VGPR[0-9]+}}, {{SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+_SGPR[0-9]+}}, -1, 0, 0 +define void @test4(i32 %vdata, i32 %vaddr) { + call void @llvm.SI.tbuffer.store.i32(<16 x i8> undef, i32 %vdata, + i32 1, i32 %vaddr, i32 0, i32 8, i32 4, i32 4, i32 1, i32 0, i32 1, + i32 1, i32 0) + ret void +} + +declare void @llvm.SI.tbuffer.store.i32(<16 x i8>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) +declare void @llvm.SI.tbuffer.store.v2i32(<16 x i8>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) +declare void @llvm.SI.tbuffer.store.v4i32(<16 x i8>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)