diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 12943a2bde1..cad2b56a35f 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -33,6 +33,14 @@ defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_tgid">; defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz < "__builtin_r600_read_tidig">; + +def int_r600_rat_store_typed : + // 1st parameter: Data + // 2nd parameter: Index + // 3rd parameter: Constant RAT ID + Intrinsic<[], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], []>, + GCCBuiltin<"__builtin_r600_rat_store_typed">; + } // End TargetPrefix = "r600" let TargetPrefix = "AMDGPU" in { diff --git a/lib/Target/AMDGPU/CaymanInstructions.td b/lib/Target/AMDGPU/CaymanInstructions.td index ba4df82a6d3..a6c3785c815 100644 --- a/lib/Target/AMDGPU/CaymanInstructions.td +++ b/lib/Target/AMDGPU/CaymanInstructions.td @@ -82,6 +82,10 @@ def RAT_STORE_DWORD32 : RAT_STORE_DWORD ; def RAT_STORE_DWORD64 : RAT_STORE_DWORD ; def RAT_STORE_DWORD128 : RAT_STORE_DWORD ; +def RAT_STORE_TYPED_cm: CF_MEM_RAT_STORE_TYPED<0> { + let eop = 0; // This bit is not used on Cayman. +} + class VTX_READ_cm buffer_id, dag outs, list pattern> : VTX_WORD0_cm, VTX_READ { diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td index 7adcd46fe19..779a14e95d2 100644 --- a/lib/Target/AMDGPU/EvergreenInstructions.td +++ b/lib/Target/AMDGPU/EvergreenInstructions.td @@ -40,6 +40,15 @@ class CF_MEM_RAT rat_inst, bits<4> rat_id, dag ins, string name, : EG_CF_RAT <0x56, rat_inst, rat_id, 0xf /* mask */, (outs), ins, "MEM_RAT "#name, pattern>; +class CF_MEM_RAT_STORE_TYPED has_eop> + : CF_MEM_RAT <0x1, ?, (ins R600_Reg128:$rw_gpr, R600_Reg128:$index_gpr, + i32imm:$rat_id, InstFlag:$eop), + "STORE_TYPED RAT($rat_id) $rw_gpr, $index_gpr" + #!if(has_eop, ", $eop", ""), + [(int_r600_rat_store_typed R600_Reg128:$rw_gpr, + R600_Reg128:$index_gpr, + (i32 imm:$rat_id))]>; + def RAT_MSKOR : CF_MEM_RAT <0x11, 0, (ins R600_Reg128:$rw_gpr, R600_TReg32_X:$index_gpr), "MSKOR $rw_gpr.XW, $index_gpr", @@ -105,6 +114,8 @@ def RAT_WRITE_CACHELESS_128_eg : CF_MEM_RAT_CACHELESS <0x2, 0, 0xf, [(global_store v4i32:$rw_gpr, i32:$index_gpr)] >; +def RAT_STORE_TYPED_eg: CF_MEM_RAT_STORE_TYPED<1>; + } // End usesCustomInserter = 1 class VTX_READ_eg buffer_id, dag outs, list pattern> diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 4ed5c881491..124a9c6e0f5 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -286,6 +286,14 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter( .addImm(isEOP(I)); // Set End of program bit break; } + case AMDGPU::RAT_STORE_TYPED_eg: { + BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode())) + .addOperand(MI->getOperand(0)) + .addOperand(MI->getOperand(1)) + .addOperand(MI->getOperand(2)) + .addImm(isEOP(I)); // Set End of program bit + break; + } case AMDGPU::TXD: { unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass); diff --git a/test/CodeGen/AMDGPU/store_typed.ll b/test/CodeGen/AMDGPU/store_typed.ll new file mode 100644 index 00000000000..515fcf04f40 --- /dev/null +++ b/test/CodeGen/AMDGPU/store_typed.ll @@ -0,0 +1,24 @@ +; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG --check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck --check-prefix=CM --check-prefix=FUNC %s + +; store to rat 0 +; FUNC-LABEL: {{^}}store_typed_rat0: +; EG: MEM_RAT STORE_TYPED RAT(0) {{T[0-9]+, T[0-9]+}}, 1 +; CM: MEM_RAT STORE_TYPED RAT(0) {{T[0-9]+, T[0-9]+}} + +define void @store_typed_rat0(<4 x i32> %data, <4 x i32> %index) { + call void @llvm.r600.rat.store.typed(<4 x i32> %data, <4 x i32> %index, i32 0) + ret void +} + +; store to rat 11 +; FUNC-LABEL: {{^}}store_typed_rat11: +; EG: MEM_RAT STORE_TYPED RAT(11) {{T[0-9]+, T[0-9]+}}, 1 +; CM: MEM_RAT STORE_TYPED RAT(11) {{T[0-9]+, T[0-9]+}} + +define void @store_typed_rat11(<4 x i32> %data, <4 x i32> %index) { + call void @llvm.r600.rat.store.typed(<4 x i32> %data, <4 x i32> %index, i32 11) + ret void +} + +declare void @llvm.r600.rat.store.typed(<4 x i32>, <4 x i32>, i32)