diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 4fa576f8055..bf1cf8832e7 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -101,11 +101,16 @@ private: SDValue &TFE) const; bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, SDValue &Offset) const; + bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, + SDValue &VAddr, SDValue &Offset, + SDValue &SLC) const; bool SelectMUBUFScratch(SDValue Addr, SDValue &RSrc, SDValue &VAddr, SDValue &SOffset, SDValue &ImmOffset) const; bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset, SDValue &GLC, SDValue &SLC, SDValue &TFE) const; + bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, + SDValue &Offset, SDValue &GLC) const; SDNode *SelectAddrSpaceCast(SDNode *N); bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, @@ -909,6 +914,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, return false; } +bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, + SDValue &VAddr, SDValue &Offset, + SDValue &SLC) const { + SLC = CurDAG->getTargetConstant(0, MVT::i1); + + return SelectMUBUFAddr64(Addr, SRsrc, VAddr, Offset); +} + static SDValue buildRSRC(SelectionDAG *DAG, SDLoc DL, SDValue Ptr, uint32_t RsrcDword1, uint64_t RsrcDword2And3) { @@ -1019,6 +1032,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, return false; } +bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, + SDValue &Soffset, SDValue &Offset, + SDValue &GLC) const { + SDValue SLC, TFE; + + return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE); +} + // FIXME: This is incorrect and only enough to be able to compile. SDNode *AMDGPUDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) { AddrSpaceCastSDNode *ASC = cast(N); diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index d152c884522..a6086270968 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -380,6 +380,14 @@ def mskor_flat : PatFrag<(ops node:$val, node:$ptr), return dyn_cast(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS; }]>; +class global_binary_atomic_op : PatFrag< + (ops node:$ptr, node:$value), + (atomic_op node:$ptr, node:$value), + [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}] +>; + +def atomic_add_global : global_binary_atomic_op; + //===----------------------------------------------------------------------===// // Misc Pattern Fragments //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 2d91d496c28..55588f37623 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -196,8 +196,10 @@ def DS64Bit4ByteAligned : ComplexPattern; def MUBUFAddr32 : ComplexPattern; def MUBUFAddr64 : ComplexPattern; +def MUBUFAddr64Atomic : ComplexPattern; def MUBUFScratch : ComplexPattern; def MUBUFOffset : ComplexPattern; +def MUBUFOffsetAtomic : ComplexPattern; def VOP3Mods0 : ComplexPattern; def VOP3Mods : ComplexPattern; @@ -929,9 +931,10 @@ class DS_1A1D_NORET op, string asm, RegisterClass rc, string noRetOp = let mayLoad = 1; } -class MUBUFAddr64Table { +class MUBUFAddr64Table { bit IsAddr64 = is_addr64; + string OpName = NAME # suffix; } class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBUF < @@ -947,6 +950,80 @@ class MTBUF_Store_Helper op, string asm, RegisterClass regClass> : MTBU let mayLoad = 0; } +class MUBUFAtomicAddr64 op, dag outs, dag ins, string asm, list pattern> + : MUBUF { + + let offen = 0; + let idxen = 0; + let addr64 = 1; + let tfe = 0; + let lds = 0; + let soffset = 128; +} + +class MUBUFAtomicOffset op, dag outs, dag ins, string asm, list pattern> + : MUBUF { + + let offen = 0; + let idxen = 0; + let addr64 = 0; + let tfe = 0; + let lds = 0; + let vaddr = 0; +} + +multiclass MUBUF_Atomic op, string name, RegisterClass rc, + ValueType vt, SDPatternOperator atomic> { + + let mayStore = 1, mayLoad = 1 in { + + // No return variants + let glc = 0 in { + + def _ADDR64 : MUBUFAtomicAddr64 < + op, (outs), + (ins rc:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#"$slc", [] + >, MUBUFAddr64Table<1>, AtomicNoRet; + + def _OFFSET : MUBUFAtomicOffset < + op, (outs), + (ins rc:$vdata, SReg_128:$srsrc, mbuf_offset:$offset, + SSrc_32:$soffset, slc:$slc), + name#" $vdata, $srsrc, $soffset"#"$offset"#"$slc", [] + >, MUBUFAddr64Table<0>, AtomicNoRet; + } // glc = 0 + + // Variant that return values + let glc = 1, Constraints = "$vdata = $vdata_in", + DisableEncoding = "$vdata_in" in { + + def _RTN_ADDR64 : MUBUFAtomicAddr64 < + op, (outs rc:$vdata), + (ins rc:$vdata_in, SReg_128:$srsrc, VReg_64:$vaddr, + mbuf_offset:$offset, slc:$slc), + name#" $vdata, $vaddr, $srsrc, 0 addr64"#"$offset"#" glc"#"$slc", + [(set vt:$vdata, + (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i16:$offset, + i1:$slc), vt:$vdata_in))] + >, MUBUFAddr64Table<1, "_RTN">, AtomicNoRet; + + def _RTN_OFFSET : MUBUFAtomicOffset < + op, (outs rc:$vdata), + (ins rc:$vdata_in, SReg_128:$srsrc, mbuf_offset:$offset, + SSrc_32:$soffset, slc:$slc), + name#" $vdata, $srsrc, $soffset"#"$offset"#" glc $slc", + [(set vt:$vdata, + (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, + i1:$slc), vt:$vdata_in))] + >, MUBUFAddr64Table<0, "_RTN">, AtomicNoRet; + + } // glc = 1 + + } // mayStore = 1, mayLoad = 1 +} + multiclass MUBUF_Load_Helper op, string asm, RegisterClass regClass, ValueType load_vt = i32, SDPatternOperator ld = null_frag> { @@ -1292,7 +1369,7 @@ def getMCOpcode : InstrMapping { def getAddr64Inst : InstrMapping { let FilterClass = "MUBUFAddr64Table"; - let RowFields = ["NAME"]; + let RowFields = ["OpName"]; let ColFields = ["IsAddr64"]; let KeyCol = ["0"]; let ValueCols = [["1"]]; diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index cbf6d86e0e4..40a26e85dd0 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -896,7 +896,9 @@ defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < >; //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; -//def BUFFER_ATOMIC_ADD : MUBUF_ <0x00000032, "BUFFER_ATOMIC_ADD", []>; +defm BUFFER_ATOMIC_ADD : MUBUF_Atomic < + 0x00000032, "BUFFER_ATOMIC_ADD", VReg_32, i32, atomic_add_global +>; //def BUFFER_ATOMIC_SUB : MUBUF_ <0x00000033, "BUFFER_ATOMIC_SUB", []>; //def BUFFER_ATOMIC_RSUB : MUBUF_ <0x00000034, "BUFFER_ATOMIC_RSUB", []>; //def BUFFER_ATOMIC_SMIN : MUBUF_ <0x00000035, "BUFFER_ATOMIC_SMIN", []>; diff --git a/test/CodeGen/R600/global_atomics.ll b/test/CodeGen/R600/global_atomics.ll new file mode 100644 index 00000000000..665913fbdff --- /dev/null +++ b/test/CodeGen/R600/global_atomics.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s + +; FUNC-LABEL: @atomic_load_i32_offset +; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} +define void @atomic_load_i32_offset(i32 addrspace(1)* %out, i32 %in) { +entry: + %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: @atomic_load_i32_ret_offset +; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], s[{{[0-9]+}}:{{[0-9]+}}], 0 glc +; SI: BUFFER_STORE_DWORD [[RET]] +define void @atomic_load_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) { +entry: + %0 = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst + store i32 %0, i32 addrspace(1)* %out2 + ret void +} + +; FUNC-LABEL: @atomic_load_i32_addr64 +; SI: BUFFER_ATOMIC_ADD v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}} +define void @atomic_load_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32 addrspace(1)* %out, i64 %index + %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst + ret void +} + +; FUNC-LABEL: @atomic_load_i32_ret_addr64 +; SI: BUFFER_ATOMIC_ADD [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}} +; SI: BUFFER_STORE_DWORD [[RET]] +define void @atomic_load_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) { +entry: + %ptr = getelementptr i32 addrspace(1)* %out, i64 %index + %0 = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst + store i32 %0, i32 addrspace(1)* %out2 + ret void +}