R600: Use KCache for kernel arguments

Reviewed-by: Vincent Lejeune <vljn at ovi.com>

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@186918 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tom Stellard 2013-07-23 01:48:18 +00:00
parent 125f9d0ca2
commit a7eea0568c
21 changed files with 108 additions and 139 deletions

View File

@ -74,6 +74,12 @@ enum AddressSpaces {
ADDRESS_NONE = 5, ///< Address space for unknown memory.
PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0)
PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1)
// Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this
// order to be able to dynamically index a constant buffer, for example:
//
// ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
CONSTANT_BUFFER_0 = 8,
CONSTANT_BUFFER_1 = 9,
CONSTANT_BUFFER_2 = 10,

View File

@ -565,24 +565,11 @@ bool AMDGPUDAGToDAGISel::isRegionStore(const StoreSDNode *N) {
return checkType(N->getSrcValue(), AMDGPUAS::REGION_ADDRESS);
}
bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int cbID) const {
if (checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS)) {
return true;
}
const DataLayout *DL = TM.getDataLayout();
MachineMemOperand *MMO = N->getMemOperand();
const Value *V = MMO->getValue();
const Value *BV = GetUnderlyingObject(V, DL, 0);
if (MMO
&& MMO->getValue()
&& ((V && dyn_cast<GlobalValue>(V))
|| (BV && dyn_cast<GlobalValue>(
GetUnderlyingObject(MMO->getValue(), DL, 0))))) {
return checkType(N->getSrcValue(), AMDGPUAS::PRIVATE_ADDRESS);
} else {
return false;
bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const {
if (CbId == -1) {
return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_ADDRESS);
}
return checkType(N->getSrcValue(), AMDGPUAS::CONSTANT_BUFFER_0 + CbId);
}
bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const {

View File

@ -72,10 +72,10 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
setOperationAction(ISD::LOAD, MVT::i32, Custom);
setOperationAction(ISD::LOAD, MVT::v2i32, Expand);
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
setOperationAction(ISD::STORE, MVT::i8, Custom);
setOperationAction(ISD::STORE, MVT::i32, Custom);
setOperationAction(ISD::STORE, MVT::v2i32, Expand);
@ -775,7 +775,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
unsigned DwordOffset) const {
unsigned ByteOffset = DwordOffset * 4;
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::PARAM_I_ADDRESS);
AMDGPUAS::CONSTANT_BUFFER_0);
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
assert(isInt<16>(ByteOffset));
@ -1219,40 +1219,20 @@ SDValue R600TargetLowering::LowerFormalArguments(
AnalyzeFormalArguments(CCInfo, Ins);
Function::const_arg_iterator FuncArg =
DAG.getMachineFunction().getFunction()->arg_begin();
for (unsigned i = 0, e = Ins.size(); i < e; ++i, ++FuncArg) {
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
CCValAssign &VA = ArgLocs[i];
EVT VT = VA.getLocVT();
Type *ArgType = FuncArg->getType();
unsigned ArgSizeInBits = ArgType->isPointerTy() ?
32 : ArgType->getPrimitiveSizeInBits();
unsigned ArgBytes = ArgSizeInBits >> 3;
EVT ArgVT;
if (ArgSizeInBits < VT.getSizeInBits()) {
assert(!ArgType->isFloatTy() &&
"Extending floating point arguments not supported yet");
ArgVT = MVT::getIntegerVT(ArgSizeInBits);
} else {
ArgVT = VT;
}
ISD::LoadExtType LoadType = ISD::EXTLOAD;
if (Ins[i].Flags.isZExt()) {
LoadType = ISD::ZEXTLOAD;
} else if (Ins[i].Flags.isSExt()) {
LoadType = ISD::SEXTLOAD;
}
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
AMDGPUAS::PARAM_I_ADDRESS);
AMDGPUAS::CONSTANT_BUFFER_0);
// The first 36 bytes of the input buffer contains information about
// thread group and global sizes.
SDValue Arg = DAG.getExtLoad(LoadType, DL, VT, DAG.getRoot(),
SDValue Arg = DAG.getLoad(VT, DL, Chain,
DAG.getConstant(36 + VA.getLocMemOffset(), MVT::i32),
MachinePointerInfo(UndefValue::get(PtrTy)),
ArgVT, false, false, ArgBytes);
MachinePointerInfo(UndefValue::get(PtrTy)), false,
false, false, 4); // 4 is the prefered alignment for
// the CONSTANT memory space.
InVals.push_back(Arg);
}
return Chain;

View File

@ -313,7 +313,7 @@ class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
class LoadParamFrag <PatFrag load_type> : PatFrag <
(ops node:$ptr), (load_type node:$ptr),
[{ return isParamLoad(dyn_cast<LoadSDNode>(N)); }]
[{ return isConstantLoad(dyn_cast<LoadSDNode>(N), 0); }]
>;
def load_param : LoadParamFrag<load>;

View File

@ -2,7 +2,10 @@
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
; R600-CHECK: @v4i32_kernel_arg
; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
; SI-CHECK: @v4i32_kernel_arg
; SI-CHECK: BUFFER_STORE_DWORDX4
define void @v4i32_kernel_arg(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
@ -12,7 +15,10 @@ entry:
}
; R600-CHECK: @v4f32_kernel_arg
; R600-CHECK: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 52
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR:[0-9]]].X, KC0[3].Y
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Y, KC0[3].Z
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].Z, KC0[3].W
; R600-CHECK-DAG: MOV {{[* ]*}}T[[GPR]].W, KC0[4].X
; SI-CHECK: @v4f32_kernel_arg
; SI-CHECK: BUFFER_STORE_DWORDX4
define void @v4f32_kernel_args(<4 x float> addrspace(1)* %out, <4 x float> %in) {

View File

@ -2,8 +2,8 @@
; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
;EG-CHECK: @test2
;EG-CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], literal\.[xyzw]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: @test2
;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}
@ -19,10 +19,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
}
;EG-CHECK: @test4
;EG-CHECK: ADD_INT T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;EG-CHECK: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI-CHECK: @test4
;SI-CHECK: V_ADD_I32_e32 VGPR{{[0-9]+, VGPR[0-9]+, VGPR[0-9]+}}

View File

@ -36,8 +36,8 @@ entry:
; SHA-256 Ma function
; ((x & z) | (y & (x | z)))
; R600-CHECK: @bfi_sha256_ma
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]],
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}
; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}

View File

@ -1,19 +1,17 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;CHECK: MUL_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}
;CHECK-DAG: MUL_IEEE * T{{[0-9]+\.[XYZW]}}
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x float> addrspace(1)* %in, i32 1
%a = load <4 x float> addrspace(1) * %in
%b = load <4 x float> addrspace(1) * %b_ptr
%result = fdiv <4 x float> %a, %b
store <4 x float> %result, <4 x float> addrspace(1)* %out
define void @test(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
entry:
%0 = fdiv <4 x float> %a, %b
store <4 x float> %0, <4 x float> addrspace(1)* %out
ret void
}

View File

@ -1,10 +1,10 @@
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
; CHECK: @fp_to_sint_v4i32
; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_INT T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_INT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
; CHECK: FLT_TO_INT {{[* ]*}}T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
define void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%value = load <4 x float> addrspace(1) * %in

View File

@ -1,6 +1,6 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
;CHECK: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define void @test(float addrspace(1)* %out, i8 addrspace(1)* %in) {
%1 = load i8 addrspace(1)* %in

View File

@ -2,12 +2,12 @@
; Test using an integer literal constant.
; Generated ASM should be:
; ADD_INT REG literal.x, 5
; ADD_INT KC0[2].Z literal.x, 5
; or
; ADD_INT literal.x REG, 5
; ADD_INT literal.x KC0[2].Z, 5
; CHECK: @i32_literal
; CHECK: ADD_INT * {{[A-Z0-9,. ]*}}literal.x
; CHECK: ADD_INT * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: 5
define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
entry:
@ -18,12 +18,12 @@ entry:
; Test using a float literal constant.
; Generated ASM should be:
; ADD REG literal.x, 5.0
; ADD KC0[2].Z literal.x, 5.0
; or
; ADD literal.x REG, 5.0
; ADD literal.x KC0[2].Z, 5.0
; CHECK: @float_literal
; CHECK: ADD * {{[A-Z0-9,. ]*}}literal.x
; CHECK: ADD * T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
; CHECK-NEXT: 1084227584(5.0
define void @float_literal(float addrspace(1)* %out, float %in) {
entry:

View File

@ -2,7 +2,7 @@
; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck --check-prefix=SI-CHECK %s
; R600-CHECK: @amdgpu_trunc
; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600-CHECK: TRUNC * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI-CHECK: @amdgpu_trunc
; SI-CHECK: V_TRUNC_F32

View File

@ -3,8 +3,8 @@
; load a v2i32 value from the global address space.
; EG-CHECK: @load_v2i32
; EG-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4
; EG-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
; EG-CHECK-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4
; EG-CHECK-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
; SI-CHECK: @load_v2i32
; SI-CHECK: BUFFER_LOAD_DWORDX2 VGPR{{[0-9]+}}
define void @load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {

View File

@ -1,12 +1,9 @@
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
;CHECK: TEX
;CHECK: ALU_PUSH
;CHECK: JUMP @15
;CHECK: TEX
;CHECK: LOOP_START_DX10 @14
;CHECK: LOOP_BREAK @13
;CHECK: POP @15
;CHECK: LOOP_START_DX10 @11
;CHECK: LOOP_BREAK @10
;CHECK: POP @10
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
target triple = "r600--"

View File

@ -19,7 +19,7 @@ entry:
; R600-CHECK: @rotl
; R600-CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
; R600-CHECK-NEXT: 32
; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PV.[XYZW]}}
; R600-CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW]}}, KC0[2].Z, KC0[2].Z, PV.{{[XYZW]}}
; SI-CHECK: @rotl
; SI-CHECK: V_SUB_I32_e64 [[DST:VGPR[0-9]+]], 32, {{[SV]GPR[0-9]+}}

View File

@ -29,7 +29,6 @@ ENDIF:
; for the icmp instruction
; CHECK: @test_b
; CHECK: VTX_READ
; CHECK: SET{{[GTEQN]+}}_DX10
; CHECK-NEXT: PRED_
; CHECK-NEXT: ALU clause starting

View File

@ -5,7 +5,7 @@
; SET*DX10 instructions.
; CHECK: @fcmp_une_select_fptosi
; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -18,7 +18,7 @@ entry:
}
; CHECK: @fcmp_une_select_i32
; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
; CHECK: SETNE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@ -29,7 +29,7 @@ entry:
}
; CHECK: @fcmp_ueq_select_fptosi
; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -42,7 +42,7 @@ entry:
}
; CHECK: @fcmp_ueq_select_i32
; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
; CHECK: SETE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@ -53,7 +53,7 @@ entry:
}
; CHECK: @fcmp_ugt_select_fptosi
; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -66,7 +66,7 @@ entry:
}
; CHECK: @fcmp_ugt_select_i32
; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@ -77,7 +77,7 @@ entry:
}
; CHECK: @fcmp_uge_select_fptosi
; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -90,7 +90,7 @@ entry:
}
; CHECK: @fcmp_uge_select_i32
; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, T{{[0-9]+\.[XYZW]}}, literal.x,
; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@ -101,7 +101,7 @@ entry:
}
; CHECK: @fcmp_ule_select_fptosi
; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -114,7 +114,7 @@ entry:
}
; CHECK: @fcmp_ule_select_i32
; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
; CHECK: SETGE_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
entry:
@ -125,7 +125,7 @@ entry:
}
; CHECK: @fcmp_ult_select_fptosi
; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
entry:
@ -138,7 +138,7 @@ entry:
}
; CHECK: @fcmp_ult_select_i32
; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, T{{[0-9]+\.[XYZW]}},
; CHECK: SETGT_DX10 * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
entry:

View File

@ -2,7 +2,7 @@
; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s
; CHECK: @i8_arg
; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
entry:
@ -12,7 +12,7 @@ entry:
}
; CHECK: @i8_zext_arg
; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
entry:
@ -22,7 +22,7 @@ entry:
}
; CHECK: @i8_sext_arg
; CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
entry:
%0 = sext i8 %in to i32
@ -31,7 +31,7 @@ entry:
}
; CHECK: @i16_arg
; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
entry:
@ -41,7 +41,7 @@ entry:
}
; CHECK: @i16_zext_arg
; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
entry:
@ -51,7 +51,7 @@ entry:
}
; CHECK: @i16_sext_arg
; CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}}
; CHECK: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
entry:

View File

@ -3,7 +3,7 @@
; These tests are for condition codes that are not supported by the hardware
; CHECK: @slt
; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: 5(7.006492e-45)
define void @slt(i32 addrspace(1)* %out, i32 %in) {
entry:
@ -14,7 +14,7 @@ entry:
}
; CHECK: @ult_i32
; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: 5(7.006492e-45)
define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
@ -25,7 +25,7 @@ entry:
}
; CHECK: @ult_float
; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @ult_float(float addrspace(1)* %out, float %in) {
entry:
@ -36,7 +36,7 @@ entry:
}
; CHECK: @olt
; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
; CHECK: SETGT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
;CHECK-NEXT: 1084227584(5.000000e+00)
define void @olt(float addrspace(1)* %out, float %in) {
entry:
@ -47,7 +47,7 @@ entry:
}
; CHECK: @sle
; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
; CHECK: SETGT_INT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: 6(8.407791e-45)
define void @sle(i32 addrspace(1)* %out, i32 %in) {
entry:
@ -58,7 +58,7 @@ entry:
}
; CHECK: @ule_i32
; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
; CHECK: SETGT_UINT * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: 6(8.407791e-45)
define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
entry:
@ -69,7 +69,7 @@ entry:
}
; CHECK: @ule_float
; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT: 1084227584(5.000000e+00)
define void @ule_float(float addrspace(1)* %out, float %in) {
entry:
@ -80,7 +80,7 @@ entry:
}
; CHECK: @ole
; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, {{T[0-9]+\.[XYZW]}},
; CHECK: SETGE * T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
; CHECK-NEXT:1084227584(5.000000e+00)
define void @ole(float addrspace(1)* %out, float %in) {
entry:

View File

@ -6,17 +6,13 @@
; CHECK: @test
; CHECK: Fetch clause
; CHECK_VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 40
; CHECK_VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 44
; CHECK: Fetch clause
; CHECK_VTX_READ_32 [[IN0:T[0-9]+\.X]], [[IN0]], 0
; CHECK: Fetch clause
; CHECK_VTX_READ_32 [[IN1:T[0-9]+\.X]], [[IN1]], 0
define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in0, i32 addrspace(1)* nocapture %in1) {
define void @test(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* addrspace(1)* nocapture %in0) {
entry:
%0 = load i32 addrspace(1)* %in0, align 4
%1 = load i32 addrspace(1)* %in1, align 4
%cmp.i = icmp slt i32 %0, %1
%cond.i = select i1 %cmp.i, i32 %0, i32 %1
store i32 %cond.i, i32 addrspace(1)* %out, align 4
%0 = load i32 addrspace(1)* addrspace(1)* %in0
%1 = load i32 addrspace(1)* %0
store i32 %1, i32 addrspace(1)* %out
ret void
}

View File

@ -3,7 +3,7 @@
; R600-CHECK: @ngroups_x
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 0
; R600-CHECK: MOV * [[VAL]], KC0[0].X
; SI-CHECK: @ngroups_x
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 0
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -17,7 +17,7 @@ entry:
; R600-CHECK: @ngroups_y
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 4
; R600-CHECK: MOV * [[VAL]], KC0[0].Y
; SI-CHECK: @ngroups_y
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 1
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -31,7 +31,7 @@ entry:
; R600-CHECK: @ngroups_z
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 8
; R600-CHECK: MOV * [[VAL]], KC0[0].Z
; SI-CHECK: @ngroups_z
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 2
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -45,7 +45,7 @@ entry:
; R600-CHECK: @global_size_x
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 12
; R600-CHECK: MOV * [[VAL]], KC0[0].W
; SI-CHECK: @global_size_x
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 3
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -59,7 +59,7 @@ entry:
; R600-CHECK: @global_size_y
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 16
; R600-CHECK: MOV * [[VAL]], KC0[1].X
; SI-CHECK: @global_size_y
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 4
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -73,7 +73,7 @@ entry:
; R600-CHECK: @global_size_z
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 20
; R600-CHECK: MOV * [[VAL]], KC0[1].Y
; SI-CHECK: @global_size_z
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 5
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -87,7 +87,7 @@ entry:
; R600-CHECK: @local_size_x
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 24
; R600-CHECK: MOV * [[VAL]], KC0[1].Z
; SI-CHECK: @local_size_x
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 6
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -101,7 +101,7 @@ entry:
; R600-CHECK: @local_size_y
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 28
; R600-CHECK: MOV * [[VAL]], KC0[1].W
; SI-CHECK: @local_size_y
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 7
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]
@ -115,7 +115,7 @@ entry:
; R600-CHECK: @local_size_z
; R600-CHECK: RAT_WRITE_CACHELESS_32_eg [[VAL:T[0-9]+\.X]]
; R600-CHECK: VTX_READ_32 [[VAL]], [[VAL]], 32
; R600-CHECK: MOV * [[VAL]], KC0[2].X
; SI-CHECK: @local_size_z
; SI-CHECK: S_LOAD_DWORD [[VAL:SGPR[0-9]+]], SGPR0_SGPR1, 8
; SI-CHECK: V_MOV_B32_e32 [[VVAL:VGPR[0-9]+]], [[VAL]]