mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-23 12:40:17 +00:00
R600: Use function inputs to represent data stored in gpr
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194425 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
6c7a7c6474
commit
70a7d5ddb4
@ -42,6 +42,17 @@ def CC_SI : CallingConv<[
|
||||
|
||||
]>;
|
||||
|
||||
// Calling convention for R600
|
||||
def CC_R600 : CallingConv<[
|
||||
CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
|
||||
T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
|
||||
T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
|
||||
T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
|
||||
T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
|
||||
T30_XYZW, T31_XYZW, T32_XYZW
|
||||
]>>>
|
||||
]>;
|
||||
|
||||
// Calling convention for compute kernels
|
||||
def CC_AMDGPU_Kernel : CallingConv<[
|
||||
CCCustom<"allocateStack">
|
||||
@ -57,5 +68,7 @@ def CC_AMDGPU : CallingConv<[
|
||||
"State.getMachineFunction().getInfo<R600MachineFunctionInfo>()->"
|
||||
"ShaderType == ShaderType::COMPUTE", CCDelegateTo<CC_AMDGPU_Kernel>>,
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
|
||||
".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>
|
||||
".getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_SI>>,
|
||||
CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>()"#
|
||||
".getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS", CCDelegateTo<CC_R600>>
|
||||
]>;
|
||||
|
@ -554,51 +554,23 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
|
||||
SDLoc DL(Op);
|
||||
switch(IntrinsicID) {
|
||||
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
case AMDGPUIntrinsic::R600_load_input: {
|
||||
int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
MRI.addLiveIn(Reg);
|
||||
return DAG.getCopyFromReg(DAG.getEntryNode(),
|
||||
SDLoc(DAG.getEntryNode()), Reg, VT);
|
||||
}
|
||||
|
||||
case AMDGPUIntrinsic::R600_interp_input: {
|
||||
case AMDGPUIntrinsic::R600_interp_xy:
|
||||
case AMDGPUIntrinsic::R600_interp_zw: {
|
||||
int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
|
||||
MachineSDNode *interp;
|
||||
if (ijb < 0) {
|
||||
const MachineFunction &MF = DAG.getMachineFunction();
|
||||
const R600InstrInfo *TII =
|
||||
static_cast<const R600InstrInfo*>(MF.getTarget().getInstrInfo());
|
||||
interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
|
||||
MVT::v4f32, DAG.getTargetConstant(slot / 4 , MVT::i32));
|
||||
return DAG.getTargetExtractSubreg(
|
||||
TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
|
||||
DL, MVT::f32, SDValue(interp, 0));
|
||||
}
|
||||
SDValue RegisterINode = Op.getOperand(2);
|
||||
SDValue RegisterJNode = Op.getOperand(3);
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
|
||||
unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
|
||||
MRI.addLiveIn(RegisterI);
|
||||
MRI.addLiveIn(RegisterJ);
|
||||
SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
|
||||
SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
|
||||
SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
|
||||
SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
|
||||
|
||||
if (slot % 4 < 2)
|
||||
if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
|
||||
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
|
||||
MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
|
||||
MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
|
||||
RegisterJNode, RegisterINode);
|
||||
else
|
||||
interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
|
||||
MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4 , MVT::i32),
|
||||
MVT::f32, MVT::f32, DAG.getTargetConstant(slot, MVT::i32),
|
||||
RegisterJNode, RegisterINode);
|
||||
return SDValue(interp, slot % 2);
|
||||
return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
|
||||
SDValue(interp, 0), SDValue(interp, 1));
|
||||
}
|
||||
case AMDGPUIntrinsic::R600_tex:
|
||||
case AMDGPUIntrinsic::R600_texc:
|
||||
@ -1339,6 +1311,8 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
SmallVector<CCValAssign, 16> ArgLocs;
|
||||
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
|
||||
getTargetMachine(), ArgLocs, *DAG.getContext());
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->ShaderType;
|
||||
|
||||
SmallVector<ISD::InputArg, 8> LocalIns;
|
||||
|
||||
@ -1352,6 +1326,13 @@ SDValue R600TargetLowering::LowerFormalArguments(
|
||||
EVT VT = Ins[i].VT;
|
||||
EVT MemVT = LocalIns[i].VT;
|
||||
|
||||
if (ShaderType != ShaderType::COMPUTE) {
|
||||
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
|
||||
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
|
||||
InVals.push_back(Register);
|
||||
continue;
|
||||
}
|
||||
|
||||
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
||||
AMDGPUAS::CONSTANT_BUFFER_0);
|
||||
|
||||
|
@ -418,7 +418,7 @@ def INTERP_VEC_LOAD : AMDGPUShaderInst <
|
||||
(outs R600_Reg128:$dst),
|
||||
(ins i32imm:$src0),
|
||||
"INTERP_LOAD $src0 : $dst",
|
||||
[]>;
|
||||
[(set R600_Reg128:$dst, (int_R600_interp_const imm:$src0))]>;
|
||||
|
||||
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
|
||||
let bank_swizzle = 5;
|
||||
|
@ -39,10 +39,14 @@ let TargetPrefix = "R600", isTarget = 1 in {
|
||||
llvm_i32_ty // coord_type_w
|
||||
], [IntrNoMem]>;
|
||||
|
||||
def int_R600_load_input :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_interp_input :
|
||||
Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_interp_const :
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_interp_xy :
|
||||
Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_R600_interp_zw :
|
||||
Intrinsic<[llvm_v2f32_ty], [llvm_i32_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
|
||||
def int_R600_load_texbuf :
|
||||
Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
|
||||
def int_R600_tex : TextureIntrinsicFloatInput;
|
||||
|
@ -4,54 +4,54 @@
|
||||
;This test ensures that R600 backend can handle ifcvt properly
|
||||
;and do not generate ALU clauses with more than 128 instructions.
|
||||
|
||||
define void @main() #0 {
|
||||
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7, <4 x float> inreg %reg8, <4 x float> inreg %reg9) #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 0)
|
||||
%1 = call float @llvm.R600.load.input(i32 1)
|
||||
%2 = call float @llvm.R600.load.input(i32 2)
|
||||
%3 = call float @llvm.R600.load.input(i32 3)
|
||||
%4 = call float @llvm.R600.load.input(i32 4)
|
||||
%5 = call float @llvm.R600.load.input(i32 36)
|
||||
%6 = call float @llvm.R600.load.input(i32 32)
|
||||
%0 = extractelement <4 x float> %reg0, i32 0
|
||||
%1 = extractelement <4 x float> %reg0, i32 1
|
||||
%2 = extractelement <4 x float> %reg0, i32 2
|
||||
%3 = extractelement <4 x float> %reg0, i32 3
|
||||
%4 = extractelement <4 x float> %reg1, i32 0
|
||||
%5 = extractelement <4 x float> %reg9, i32 0
|
||||
%6 = extractelement <4 x float> %reg8, i32 0
|
||||
%7 = fcmp ugt float %6, 0.000000e+00
|
||||
%8 = select i1 %7, float %4, float %5
|
||||
%9 = call float @llvm.R600.load.input(i32 5)
|
||||
%10 = call float @llvm.R600.load.input(i32 37)
|
||||
%11 = call float @llvm.R600.load.input(i32 32)
|
||||
%9 = extractelement <4 x float> %reg1, i32 1
|
||||
%10 = extractelement <4 x float> %reg9, i32 1
|
||||
%11 = extractelement <4 x float> %reg8, i32 0
|
||||
%12 = fcmp ugt float %11, 0.000000e+00
|
||||
%13 = select i1 %12, float %9, float %10
|
||||
%14 = call float @llvm.R600.load.input(i32 6)
|
||||
%15 = call float @llvm.R600.load.input(i32 38)
|
||||
%16 = call float @llvm.R600.load.input(i32 32)
|
||||
%14 = extractelement <4 x float> %reg1, i32 2
|
||||
%15 = extractelement <4 x float> %reg9, i32 2
|
||||
%16 = extractelement <4 x float> %reg8, i32 0
|
||||
%17 = fcmp ugt float %16, 0.000000e+00
|
||||
%18 = select i1 %17, float %14, float %15
|
||||
%19 = call float @llvm.R600.load.input(i32 7)
|
||||
%20 = call float @llvm.R600.load.input(i32 39)
|
||||
%21 = call float @llvm.R600.load.input(i32 32)
|
||||
%22 = call float @llvm.R600.load.input(i32 8)
|
||||
%23 = call float @llvm.R600.load.input(i32 9)
|
||||
%24 = call float @llvm.R600.load.input(i32 10)
|
||||
%25 = call float @llvm.R600.load.input(i32 11)
|
||||
%26 = call float @llvm.R600.load.input(i32 12)
|
||||
%27 = call float @llvm.R600.load.input(i32 13)
|
||||
%28 = call float @llvm.R600.load.input(i32 14)
|
||||
%29 = call float @llvm.R600.load.input(i32 15)
|
||||
%30 = call float @llvm.R600.load.input(i32 16)
|
||||
%31 = call float @llvm.R600.load.input(i32 17)
|
||||
%32 = call float @llvm.R600.load.input(i32 18)
|
||||
%33 = call float @llvm.R600.load.input(i32 19)
|
||||
%34 = call float @llvm.R600.load.input(i32 20)
|
||||
%35 = call float @llvm.R600.load.input(i32 21)
|
||||
%36 = call float @llvm.R600.load.input(i32 22)
|
||||
%37 = call float @llvm.R600.load.input(i32 23)
|
||||
%38 = call float @llvm.R600.load.input(i32 24)
|
||||
%39 = call float @llvm.R600.load.input(i32 25)
|
||||
%40 = call float @llvm.R600.load.input(i32 26)
|
||||
%41 = call float @llvm.R600.load.input(i32 27)
|
||||
%42 = call float @llvm.R600.load.input(i32 28)
|
||||
%43 = call float @llvm.R600.load.input(i32 29)
|
||||
%44 = call float @llvm.R600.load.input(i32 30)
|
||||
%45 = call float @llvm.R600.load.input(i32 31)
|
||||
%19 = extractelement <4 x float> %reg1, i32 3
|
||||
%20 = extractelement <4 x float> %reg9, i32 3
|
||||
%21 = extractelement <4 x float> %reg8, i32 0
|
||||
%22 = extractelement <4 x float> %reg2, i32 0
|
||||
%23 = extractelement <4 x float> %reg2, i32 1
|
||||
%24 = extractelement <4 x float> %reg2, i32 2
|
||||
%25 = extractelement <4 x float> %reg2, i32 3
|
||||
%26 = extractelement <4 x float> %reg3, i32 0
|
||||
%27 = extractelement <4 x float> %reg3, i32 1
|
||||
%28 = extractelement <4 x float> %reg3, i32 2
|
||||
%29 = extractelement <4 x float> %reg3, i32 3
|
||||
%30 = extractelement <4 x float> %reg4, i32 0
|
||||
%31 = extractelement <4 x float> %reg4, i32 1
|
||||
%32 = extractelement <4 x float> %reg4, i32 2
|
||||
%33 = extractelement <4 x float> %reg4, i32 3
|
||||
%34 = extractelement <4 x float> %reg5, i32 0
|
||||
%35 = extractelement <4 x float> %reg5, i32 1
|
||||
%36 = extractelement <4 x float> %reg5, i32 2
|
||||
%37 = extractelement <4 x float> %reg5, i32 3
|
||||
%38 = extractelement <4 x float> %reg6, i32 0
|
||||
%39 = extractelement <4 x float> %reg6, i32 1
|
||||
%40 = extractelement <4 x float> %reg6, i32 2
|
||||
%41 = extractelement <4 x float> %reg6, i32 3
|
||||
%42 = extractelement <4 x float> %reg7, i32 0
|
||||
%43 = extractelement <4 x float> %reg7, i32 1
|
||||
%44 = extractelement <4 x float> %reg7, i32 2
|
||||
%45 = extractelement <4 x float> %reg7, i32 3
|
||||
%46 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
|
||||
%47 = extractelement <4 x float> %46, i32 0
|
||||
%48 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11)
|
||||
@ -1146,9 +1146,6 @@ ENDIF178: ; preds = %ENDIF175, %IF179
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
|
||||
|
||||
|
@ -2,9 +2,9 @@
|
||||
|
||||
; CHECK: @main
|
||||
; CHECK-NOT: MOV
|
||||
define void @main() {
|
||||
define void @main(<4 x float> inreg %reg0) #0 {
|
||||
entry:
|
||||
%0 = call float @llvm.R600.load.input(i32 0)
|
||||
%0 = extractelement <4 x float> %reg0, i32 0
|
||||
%1 = call float @fabs(float %0)
|
||||
%2 = fptoui float %1 to i32
|
||||
%3 = bitcast i32 %2 to float
|
||||
@ -13,6 +13,7 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
declare float @fabs(float ) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
@ -2,15 +2,15 @@
|
||||
|
||||
;CHECK: FLOOR * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
%r0 = extractelement <4 x float> %reg0, i32 0
|
||||
%r1 = call float @floor(float %r0)
|
||||
call void @llvm.AMDGPU.store.output(float %r1, i32 0)
|
||||
%vec = insertelement <4 x float> undef, float %r1, i32 0
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
|
||||
declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
|
||||
declare float @floor(float) readonly
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
@ -2,18 +2,18 @@
|
||||
|
||||
;CHECK: MULADD_IEEE * {{T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
%r1 = call float @llvm.R600.load.input(i32 1)
|
||||
%r2 = call float @llvm.R600.load.input(i32 2)
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
%r0 = extractelement <4 x float> %reg0, i32 0
|
||||
%r1 = extractelement <4 x float> %reg0, i32 1
|
||||
%r2 = extractelement <4 x float> %reg0, i32 2
|
||||
%r3 = fmul float %r0, %r1
|
||||
%r4 = fadd float %r3, %r2
|
||||
call void @llvm.AMDGPU.store.output(float %r4, i32 0)
|
||||
%r4 = fadd float %r3, %r2
|
||||
%vec = insertelement <4 x float> undef, float %r4, i32 0
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
|
||||
declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
|
||||
declare float @fabs(float ) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
@ -2,15 +2,16 @@
|
||||
|
||||
;CHECK: MAX * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
%r1 = call float @llvm.R600.load.input(i32 1)
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
%r0 = extractelement <4 x float> %reg0, i32 0
|
||||
%r1 = extractelement <4 x float> %reg0, i32 1
|
||||
%r2 = fcmp oge float %r0, %r1
|
||||
%r3 = select i1 %r2, float %r0, float %r1
|
||||
call void @llvm.AMDGPU.store.output(float %r3, i32 0)
|
||||
%vec = insertelement <4 x float> undef, float %r3, i32 0
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
attributes #0 = { "ShaderType"="0" }
|
@ -2,15 +2,16 @@
|
||||
|
||||
;CHECK: MIN * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
%r1 = call float @llvm.R600.load.input(i32 1)
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
%r0 = extractelement <4 x float> %reg0, i32 0
|
||||
%r1 = extractelement <4 x float> %reg0, i32 1
|
||||
%r2 = fcmp uge float %r0, %r1
|
||||
%r3 = select i1 %r2, float %r1, float %r0
|
||||
call void @llvm.AMDGPU.store.output(float %r3, i32 0)
|
||||
%vec = insertelement <4 x float> undef, float %r3, i32 0
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
attributes #0 = { "ShaderType"="0" }
|
@ -2,16 +2,16 @@
|
||||
|
||||
;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
%r1 = call float @llvm.R600.load.input(i32 1)
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
%r0 = extractelement <4 x float> %reg0, i32 0
|
||||
%r1 = extractelement <4 x float> %reg0, i32 1
|
||||
%r2 = call float @llvm.AMDGPU.mul( float %r0, float %r1)
|
||||
call void @llvm.AMDGPU.store.output(float %r2, i32 0)
|
||||
%vec = insertelement <4 x float> undef, float %r2, i32 0
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
|
||||
declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
|
||||
declare float @llvm.AMDGPU.mul(float ,float ) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
@ -5,15 +5,15 @@
|
||||
;CHECK: ADD *
|
||||
;CHECK: COS * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
%r0 = extractelement <4 x float> %reg0, i32 0
|
||||
%r1 = call float @llvm.cos.f32(float %r0)
|
||||
call void @llvm.AMDGPU.store.output(float %r1, i32 0)
|
||||
%vec = insertelement <4 x float> undef, float %r1, i32 0
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.cos.f32(float) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
|
||||
declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
|
@ -4,16 +4,16 @@
|
||||
;CHECK: MUL NON-IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PS}}
|
||||
;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
%r1 = call float @llvm.R600.load.input(i32 1)
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
%r0 = extractelement <4 x float> %reg0, i32 0
|
||||
%r1 = extractelement <4 x float> %reg0, i32 1
|
||||
%r2 = call float @llvm.pow.f32( float %r0, float %r1)
|
||||
call void @llvm.AMDGPU.store.output(float %r2, i32 0)
|
||||
%vec = insertelement <4 x float> undef, float %r2, i32 0
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
|
||||
declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
|
||||
declare float @llvm.pow.f32(float ,float ) readonly
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
|
@ -5,15 +5,15 @@
|
||||
;CHECK: ADD *
|
||||
;CHECK: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
%r0 = extractelement <4 x float> %reg0, i32 0
|
||||
%r1 = call float @llvm.sin.f32( float %r0)
|
||||
call void @llvm.AMDGPU.store.output(float %r1, i32 0)
|
||||
%vec = insertelement <4 x float> undef, float %r1, i32 0
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.sin.f32(float) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
|
||||
declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
|
@ -1,20 +1,20 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=cayman
|
||||
;REQUIRES: asserts
|
||||
|
||||
define void @main() #0 {
|
||||
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 7)
|
||||
%4 = call float @llvm.R600.load.input(i32 8)
|
||||
%5 = call float @llvm.R600.load.input(i32 9)
|
||||
%6 = call float @llvm.R600.load.input(i32 10)
|
||||
%7 = call float @llvm.R600.load.input(i32 11)
|
||||
%8 = call float @llvm.R600.load.input(i32 12)
|
||||
%9 = call float @llvm.R600.load.input(i32 13)
|
||||
%10 = call float @llvm.R600.load.input(i32 14)
|
||||
%11 = call float @llvm.R600.load.input(i32 15)
|
||||
%0 = extractelement <4 x float> %reg1, i32 0
|
||||
%1 = extractelement <4 x float> %reg1, i32 1
|
||||
%2 = extractelement <4 x float> %reg1, i32 2
|
||||
%3 = extractelement <4 x float> %reg1, i32 3
|
||||
%4 = extractelement <4 x float> %reg2, i32 0
|
||||
%5 = extractelement <4 x float> %reg2, i32 1
|
||||
%6 = extractelement <4 x float> %reg2, i32 2
|
||||
%7 = extractelement <4 x float> %reg2, i32 3
|
||||
%8 = extractelement <4 x float> %reg3, i32 0
|
||||
%9 = extractelement <4 x float> %reg3, i32 1
|
||||
%10 = extractelement <4 x float> %reg3, i32 2
|
||||
%11 = extractelement <4 x float> %reg3, i32 3
|
||||
%12 = load <4 x float> addrspace(8)* null
|
||||
%13 = extractelement <4 x float> %12, i32 0
|
||||
%14 = fmul float %0, %13
|
||||
@ -95,9 +95,6 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
|
||||
|
||||
|
@ -3,13 +3,13 @@
|
||||
; CHECK: @main
|
||||
; CHECK: ADD *
|
||||
|
||||
define void @main() #0 {
|
||||
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 7)
|
||||
%4 = call float @llvm.R600.load.input(i32 8)
|
||||
%0 = extractelement <4 x float> %reg1, i32 0
|
||||
%1 = extractelement <4 x float> %reg1, i32 1
|
||||
%2 = extractelement <4 x float> %reg1, i32 2
|
||||
%3 = extractelement <4 x float> %reg1, i32 3
|
||||
%4 = extractelement <4 x float> %reg2, i32 0
|
||||
%5 = fadd float %0, 2.0
|
||||
%6 = fadd float %1, 3.0
|
||||
%7 = fadd float %2, 4.0
|
||||
@ -32,13 +32,13 @@ main_body:
|
||||
; CHECK: @main
|
||||
; CHECK-NOT: ADD *
|
||||
|
||||
define void @main2() #0 {
|
||||
define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 7)
|
||||
%4 = call float @llvm.R600.load.input(i32 8)
|
||||
%0 = extractelement <4 x float> %reg1, i32 0
|
||||
%1 = extractelement <4 x float> %reg1, i32 1
|
||||
%2 = extractelement <4 x float> %reg1, i32 2
|
||||
%3 = extractelement <4 x float> %reg1, i32 3
|
||||
%4 = extractelement <4 x float> %reg2, i32 0
|
||||
%5 = fadd float %0, 2.0
|
||||
%6 = fadd float %1, 3.0
|
||||
%7 = fadd float %2, 4.0
|
||||
@ -59,7 +59,6 @@ main_body:
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
|
||||
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
@ -3,17 +3,17 @@
|
||||
;CHECK: DOT4 T{{[0-9]\.X}}
|
||||
;CHECK: MULADD_IEEE * T{{[0-9]\.W}}
|
||||
|
||||
define void @main() #0 {
|
||||
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3) #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 8)
|
||||
%4 = call float @llvm.R600.load.input(i32 9)
|
||||
%5 = call float @llvm.R600.load.input(i32 10)
|
||||
%6 = call float @llvm.R600.load.input(i32 12)
|
||||
%7 = call float @llvm.R600.load.input(i32 13)
|
||||
%8 = call float @llvm.R600.load.input(i32 14)
|
||||
%0 = extractelement <4 x float> %reg1, i32 0
|
||||
%1 = extractelement <4 x float> %reg1, i32 1
|
||||
%2 = extractelement <4 x float> %reg1, i32 2
|
||||
%3 = extractelement <4 x float> %reg2, i32 0
|
||||
%4 = extractelement <4 x float> %reg2, i32 1
|
||||
%5 = extractelement <4 x float> %reg2, i32 2
|
||||
%6 = extractelement <4 x float> %reg3, i32 0
|
||||
%7 = extractelement <4 x float> %reg3, i32 1
|
||||
%8 = extractelement <4 x float> %reg3, i32 2
|
||||
%9 = load <4 x float> addrspace(8)* null
|
||||
%10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||
%11 = call float @llvm.AMDGPU.dp4(<4 x float> %9, <4 x float> %9)
|
||||
@ -35,9 +35,6 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
|
||||
|
||||
@ -46,5 +43,3 @@ declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" }
|
||||
attributes #1 = { readnone }
|
||||
attributes #2 = { readonly }
|
||||
attributes #3 = { nounwind readonly }
|
||||
|
@ -3,36 +3,36 @@
|
||||
;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
|
||||
;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
|
||||
|
||||
define void @main() #0 {
|
||||
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2, <4 x float> inreg %reg3, <4 x float> inreg %reg4, <4 x float> inreg %reg5, <4 x float> inreg %reg6, <4 x float> inreg %reg7) #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 7)
|
||||
%4 = call float @llvm.R600.load.input(i32 8)
|
||||
%5 = call float @llvm.R600.load.input(i32 9)
|
||||
%6 = call float @llvm.R600.load.input(i32 10)
|
||||
%7 = call float @llvm.R600.load.input(i32 11)
|
||||
%8 = call float @llvm.R600.load.input(i32 12)
|
||||
%9 = call float @llvm.R600.load.input(i32 13)
|
||||
%10 = call float @llvm.R600.load.input(i32 14)
|
||||
%11 = call float @llvm.R600.load.input(i32 15)
|
||||
%12 = call float @llvm.R600.load.input(i32 16)
|
||||
%13 = call float @llvm.R600.load.input(i32 17)
|
||||
%14 = call float @llvm.R600.load.input(i32 18)
|
||||
%15 = call float @llvm.R600.load.input(i32 19)
|
||||
%16 = call float @llvm.R600.load.input(i32 20)
|
||||
%17 = call float @llvm.R600.load.input(i32 21)
|
||||
%18 = call float @llvm.R600.load.input(i32 22)
|
||||
%19 = call float @llvm.R600.load.input(i32 23)
|
||||
%20 = call float @llvm.R600.load.input(i32 24)
|
||||
%21 = call float @llvm.R600.load.input(i32 25)
|
||||
%22 = call float @llvm.R600.load.input(i32 26)
|
||||
%23 = call float @llvm.R600.load.input(i32 27)
|
||||
%24 = call float @llvm.R600.load.input(i32 28)
|
||||
%25 = call float @llvm.R600.load.input(i32 29)
|
||||
%26 = call float @llvm.R600.load.input(i32 30)
|
||||
%27 = call float @llvm.R600.load.input(i32 31)
|
||||
%0 = extractelement <4 x float> %reg1, i32 0
|
||||
%1 = extractelement <4 x float> %reg1, i32 1
|
||||
%2 = extractelement <4 x float> %reg1, i32 2
|
||||
%3 = extractelement <4 x float> %reg1, i32 3
|
||||
%4 = extractelement <4 x float> %reg2, i32 0
|
||||
%5 = extractelement <4 x float> %reg2, i32 1
|
||||
%6 = extractelement <4 x float> %reg2, i32 2
|
||||
%7 = extractelement <4 x float> %reg2, i32 3
|
||||
%8 = extractelement <4 x float> %reg3, i32 0
|
||||
%9 = extractelement <4 x float> %reg3, i32 1
|
||||
%10 = extractelement <4 x float> %reg3, i32 2
|
||||
%11 = extractelement <4 x float> %reg3, i32 3
|
||||
%12 = extractelement <4 x float> %reg4, i32 0
|
||||
%13 = extractelement <4 x float> %reg4, i32 1
|
||||
%14 = extractelement <4 x float> %reg4, i32 2
|
||||
%15 = extractelement <4 x float> %reg4, i32 3
|
||||
%16 = extractelement <4 x float> %reg5, i32 0
|
||||
%17 = extractelement <4 x float> %reg5, i32 1
|
||||
%18 = extractelement <4 x float> %reg5, i32 2
|
||||
%19 = extractelement <4 x float> %reg5, i32 3
|
||||
%20 = extractelement <4 x float> %reg6, i32 0
|
||||
%21 = extractelement <4 x float> %reg6, i32 1
|
||||
%22 = extractelement <4 x float> %reg6, i32 2
|
||||
%23 = extractelement <4 x float> %reg6, i32 3
|
||||
%24 = extractelement <4 x float> %reg7, i32 0
|
||||
%25 = extractelement <4 x float> %reg7, i32 1
|
||||
%26 = extractelement <4 x float> %reg7, i32 2
|
||||
%27 = extractelement <4 x float> %reg7, i32 3
|
||||
%28 = load <4 x float> addrspace(8)* null
|
||||
%29 = extractelement <4 x float> %28, i32 0
|
||||
%30 = fmul float %0, %29
|
||||
@ -218,9 +218,6 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
|
||||
|
||||
|
@ -10,15 +10,16 @@
|
||||
; R600-CHECK: @test
|
||||
; R600-CHECK: MUL_IEEE {{[ *TXYZWPVxyzw.,0-9]+}} ; encoding: [{{0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x[0-9a-f]+,0x10,0x02,0x[0-9a-f]+,0x[0-9a-f]+}}]
|
||||
|
||||
define void @test() {
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
entry:
|
||||
%0 = call float @llvm.R600.load.input(i32 0)
|
||||
%1 = call float @llvm.R600.load.input(i32 1)
|
||||
%2 = fmul float %0, %1
|
||||
call void @llvm.AMDGPU.store.output(float %2, i32 0)
|
||||
%r0 = extractelement <4 x float> %reg0, i32 0
|
||||
%r1 = extractelement <4 x float> %reg0, i32 1
|
||||
%r2 = fmul float %r0, %r1
|
||||
%vec = insertelement <4 x float> undef, float %r2, i32 0
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
|
@ -10,12 +10,12 @@
|
||||
;CHECK: EXPORT T{{[0-9]}}.0000
|
||||
|
||||
|
||||
define void @main() #0 {
|
||||
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 7)
|
||||
%0 = extractelement <4 x float> %reg1, i32 0
|
||||
%1 = extractelement <4 x float> %reg1, i32 1
|
||||
%2 = extractelement <4 x float> %reg1, i32 2
|
||||
%3 = extractelement <4 x float> %reg1, i32 3
|
||||
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
%6 = fmul float %5, %0
|
||||
@ -137,10 +137,6 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" }
|
||||
attributes #1 = { readnone }
|
||||
|
@ -1,12 +1,12 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=redwood
|
||||
;REQUIRES: asserts
|
||||
|
||||
define void @main() #0 {
|
||||
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 7)
|
||||
%0 = extractelement <4 x float> %reg1, i32 0
|
||||
%1 = extractelement <4 x float> %reg1, i32 1
|
||||
%2 = extractelement <4 x float> %reg1, i32 2
|
||||
%3 = extractelement <4 x float> %reg1, i32 3
|
||||
%4 = bitcast float %0 to i32
|
||||
%5 = icmp eq i32 %4, 0
|
||||
%6 = sext i1 %5 to i32
|
||||
@ -113,12 +113,8 @@ ENDIF48: ; preds = %LOOP47
|
||||
br label %LOOP47
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
|
||||
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" }
|
||||
attributes #1 = { readnone }
|
||||
|
@ -2,15 +2,14 @@
|
||||
|
||||
;CHECK: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
|
||||
|
||||
define void @test() {
|
||||
%r0 = call float @llvm.R600.load.input(i32 0)
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
%r0 = extractelement <4 x float> %reg0, i32 0
|
||||
%r1 = fdiv float 1.0, %r0
|
||||
call void @llvm.AMDGPU.store.output(float %r1, i32 0)
|
||||
%vec = insertelement <4 x float> undef, float %r1, i32 0
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
declare void @llvm.AMDGPU.store.output(float, i32)
|
||||
|
||||
declare float @llvm.AMDGPU.rcp(float ) readnone
|
||||
attributes #0 = { "ShaderType"="0" }
|
||||
|
@ -1,12 +1,12 @@
|
||||
; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rv710 | FileCheck %s
|
||||
|
||||
; CHECK: TEX 9 @4 ; encoding: [0x04,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
|
||||
; CHECK: TEX 9 @6 ; encoding: [0x06,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
|
||||
|
||||
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%1 = call float @llvm.R600.load.input(i32 4)
|
||||
%2 = call float @llvm.R600.load.input(i32 5)
|
||||
%3 = call float @llvm.R600.load.input(i32 6)
|
||||
%4 = call float @llvm.R600.load.input(i32 7)
|
||||
define void @test(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
|
||||
%1 = extractelement <4 x float> %reg1, i32 0
|
||||
%2 = extractelement <4 x float> %reg1, i32 1
|
||||
%3 = extractelement <4 x float> %reg1, i32 2
|
||||
%4 = extractelement <4 x float> %reg1, i32 3
|
||||
%5 = insertelement <4 x float> undef, float %1, i32 0
|
||||
%6 = insertelement <4 x float> %5, float %2, i32 1
|
||||
%7 = insertelement <4 x float> %6, float %3, i32 2
|
||||
@ -36,9 +36,6 @@ define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in)
|
||||
|
||||
declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
attributes #1 = { readnone }
|
||||
|
||||
attributes #0 = { "ShaderType"="1" }
|
||||
|
@ -1,12 +1,12 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched -verify-machineinstrs
|
||||
;REQUIRES: asserts
|
||||
|
||||
define void @main() {
|
||||
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #1 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.interp.input(i32 0, i32 0)
|
||||
%1 = call float @llvm.R600.interp.input(i32 1, i32 0)
|
||||
%2 = call float @llvm.R600.interp.input(i32 2, i32 0)
|
||||
%3 = call float @llvm.R600.interp.input(i32 3, i32 0)
|
||||
%0 = extractelement <4 x float> %reg1, i32 0
|
||||
%1 = extractelement <4 x float> %reg1, i32 1
|
||||
%2 = extractelement <4 x float> %reg1, i32 2
|
||||
%3 = extractelement <4 x float> %reg1, i32 3
|
||||
%4 = fcmp ult float %1, 0.000000e+00
|
||||
%5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
|
||||
%6 = fsub float -0.000000e+00, %5
|
||||
@ -74,10 +74,9 @@ ELSE17: ; preds = %ELSE
|
||||
br label %ENDIF
|
||||
}
|
||||
|
||||
declare float @llvm.R600.interp.input(i32, i32) #0
|
||||
|
||||
declare float @llvm.AMDIL.clamp.(float, float, float) #0
|
||||
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { readnone }
|
||||
attributes #1 = { "ShaderType"="1" }
|
||||
|
@ -1,12 +1,12 @@
|
||||
;RUN: llc < %s -march=r600 -mcpu=cayman -stress-sched -verify-misched
|
||||
;REQUIRES: asserts
|
||||
|
||||
define void @main() {
|
||||
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 7)
|
||||
%0 = extractelement <4 x float> %reg1, i32 0
|
||||
%1 = extractelement <4 x float> %reg1, i32 1
|
||||
%2 = extractelement <4 x float> %reg1, i32 2
|
||||
%3 = extractelement <4 x float> %reg1, i32 3
|
||||
%4 = fcmp ult float %0, 0.000000e+00
|
||||
%5 = select i1 %4, float 1.000000e+00, float 0.000000e+00
|
||||
%6 = fsub float -0.000000e+00, %5
|
||||
@ -127,8 +127,6 @@ ENDIF19: ; preds = %ENDIF16
|
||||
br label %LOOP
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) #0
|
||||
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { readnone }
|
||||
attributes #0 = { "ShaderType"="1" }
|
||||
|
@ -4,10 +4,10 @@
|
||||
; CHECK: MULADD_IEEE *
|
||||
; CHECK-NOT: MULADD_IEEE *
|
||||
|
||||
define void @main() {
|
||||
%w0 = call float @llvm.R600.load.input(i32 3)
|
||||
%w1 = call float @llvm.R600.load.input(i32 7)
|
||||
%w2 = call float @llvm.R600.load.input(i32 11)
|
||||
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4 x float> inreg %reg2) #0 {
|
||||
%w0 = extractelement <4 x float> %reg0, i32 3
|
||||
%w1 = extractelement <4 x float> %reg1, i32 3
|
||||
%w2 = extractelement <4 x float> %reg2, i32 3
|
||||
%sq0 = fmul float %w0, %w0
|
||||
%r0 = fadd float %sq0, 2.0
|
||||
%sq1 = fmul float %w1, %w1
|
||||
@ -23,16 +23,10 @@ define void @main() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
|
||||
|
||||
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" }
|
||||
attributes #1 = { readnone }
|
||||
attributes #2 = { readonly }
|
||||
attributes #3 = { nounwind readonly }
|
||||
attributes #1 = { readnone }
|
@ -6,12 +6,12 @@
|
||||
;EG-CHECK: EXPORT T{{[0-9]+}}.XXWX
|
||||
;EG-CHECK: EXPORT T{{[0-9]+}}.XXXW
|
||||
|
||||
define void @main() #0 {
|
||||
define void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 7)
|
||||
%0 = extractelement <4 x float> %reg1, i32 0
|
||||
%1 = extractelement <4 x float> %reg1, i32 1
|
||||
%2 = extractelement <4 x float> %reg1, i32 2
|
||||
%3 = extractelement <4 x float> %reg1, i32 3
|
||||
%4 = load <4 x float> addrspace(8)* null
|
||||
%5 = extractelement <4 x float> %4, i32 1
|
||||
%6 = load <4 x float> addrspace(8)* null
|
||||
@ -96,12 +96,12 @@ main_body:
|
||||
; EG-CHECK: T{{[0-9]+}}.XY__
|
||||
; EG-CHECK: T{{[0-9]+}}.YXZ0
|
||||
|
||||
define void @main2() #0 {
|
||||
define void @main2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) #0 {
|
||||
main_body:
|
||||
%0 = call float @llvm.R600.load.input(i32 4)
|
||||
%1 = call float @llvm.R600.load.input(i32 5)
|
||||
%2 = call float @llvm.R600.load.input(i32 6)
|
||||
%3 = call float @llvm.R600.load.input(i32 7)
|
||||
%0 = extractelement <4 x float> %reg1, i32 0
|
||||
%1 = extractelement <4 x float> %reg1, i32 1
|
||||
%2 = fadd float %0, 2.5
|
||||
%3 = fmul float %1, 3.5
|
||||
%4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
|
||||
%5 = extractelement <4 x float> %4, i32 0
|
||||
%6 = call float @llvm.cos.f32(float %5)
|
||||
@ -109,8 +109,8 @@ main_body:
|
||||
%8 = extractelement <4 x float> %7, i32 0
|
||||
%9 = load <4 x float> addrspace(8)* null
|
||||
%10 = extractelement <4 x float> %9, i32 1
|
||||
%11 = insertelement <4 x float> undef, float %0, i32 0
|
||||
%12 = insertelement <4 x float> %11, float %1, i32 1
|
||||
%11 = insertelement <4 x float> undef, float %2, i32 0
|
||||
%12 = insertelement <4 x float> %11, float %3, i32 1
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1)
|
||||
%13 = insertelement <4 x float> undef, float %6, i32 0
|
||||
%14 = insertelement <4 x float> %13, float %8, i32 1
|
||||
@ -120,14 +120,10 @@ main_body:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
; Function Attrs: nounwind readonly
|
||||
declare float @llvm.cos.f32(float) #2
|
||||
declare float @llvm.cos.f32(float) #1
|
||||
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" }
|
||||
attributes #1 = { readnone }
|
||||
attributes #2 = { nounwind readonly }
|
||||
attributes #1 = { nounwind readonly }
|
||||
|
@ -3,11 +3,11 @@
|
||||
;CHECK: TEX
|
||||
;CHECK-NEXT: ALU
|
||||
|
||||
define void @test() {
|
||||
%1 = call float @llvm.R600.load.input(i32 0)
|
||||
%2 = call float @llvm.R600.load.input(i32 1)
|
||||
%3 = call float @llvm.R600.load.input(i32 2)
|
||||
%4 = call float @llvm.R600.load.input(i32 3)
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
%1 = extractelement <4 x float> %reg0, i32 0
|
||||
%2 = extractelement <4 x float> %reg0, i32 1
|
||||
%3 = extractelement <4 x float> %reg0, i32 2
|
||||
%4 = extractelement <4 x float> %reg0, i32 3
|
||||
%5 = insertelement <4 x float> undef, float %1, i32 0
|
||||
%6 = insertelement <4 x float> %5, float %2, i32 1
|
||||
%7 = insertelement <4 x float> %6, float %3, i32 2
|
||||
@ -19,6 +19,7 @@ define void @test() {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" }
|
@ -2,11 +2,11 @@
|
||||
|
||||
;CHECK-NOT: MOV
|
||||
|
||||
define void @test() {
|
||||
%1 = call float @llvm.R600.load.input(i32 0)
|
||||
%2 = call float @llvm.R600.load.input(i32 1)
|
||||
%3 = call float @llvm.R600.load.input(i32 2)
|
||||
%4 = call float @llvm.R600.load.input(i32 3)
|
||||
define void @test(<4 x float> inreg %reg0) #0 {
|
||||
%1 = extractelement <4 x float> %reg0, i32 0
|
||||
%2 = extractelement <4 x float> %reg0, i32 1
|
||||
%3 = extractelement <4 x float> %reg0, i32 2
|
||||
%4 = extractelement <4 x float> %reg0, i32 3
|
||||
%5 = fmul float %1, 3.0
|
||||
%6 = fmul float %2, 3.0
|
||||
%7 = fmul float %3, 3.0
|
||||
@ -25,6 +25,7 @@ define void @test() {
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.R600.load.input(i32) readnone
|
||||
declare <4 x float> @llvm.R600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
|
||||
attributes #0 = { "ShaderType"="1" }
|
Loading…
Reference in New Issue
Block a user