AMDGPU: Fix TargetPrefix for remaining r600 intrinsics

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@275619 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matt Arsenault 2016-07-15 21:27:08 +00:00
parent a47e87a336
commit 5fecfa22e5
39 changed files with 146 additions and 144 deletions

View File

@ -641,7 +641,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
unsigned IntrinsicID =
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
case AMDGPUIntrinsic::R600_store_swizzle: {
case AMDGPUIntrinsic::r600_store_swizzle: {
SDLoc DL(Op);
const SDValue Args[8] = {
Chain,

View File

@ -511,22 +511,22 @@ multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
multiclass SteamOutputExportPattern<Instruction ExportInst,
bits<8> buf0inst, bits<8> buf1inst, bits<8> buf2inst, bits<8> buf3inst> {
// Stream0
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 0), (i32 imm:$mask)),
(ExportInst R600_Reg128:$src, 0, imm:$arraybase,
4095, imm:$mask, buf0inst, 0)>;
// Stream1
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 1), (i32 imm:$mask)),
(ExportInst $src, 0, imm:$arraybase,
4095, imm:$mask, buf1inst, 0)>;
// Stream2
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 2), (i32 imm:$mask)),
(ExportInst $src, 0, imm:$arraybase,
4095, imm:$mask, buf2inst, 0)>;
// Stream3
def : Pat<(int_R600_store_stream_output (v4f32 R600_Reg128:$src),
def : Pat<(int_r600_store_stream_output (v4f32 R600_Reg128:$src),
(i32 imm:$arraybase), (i32 3), (i32 imm:$mask)),
(ExportInst $src, 0, imm:$arraybase,
4095, imm:$mask, buf3inst, 0)>;

View File

@ -11,55 +11,57 @@
//
//===----------------------------------------------------------------------===//
// FIXME: Should migrate to using TargetPrefix that matches triple arch name.
let TargetPrefix = "R600", isTarget = 1 in {
def int_R600_store_swizzle :
Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []>;
def int_R600_store_stream_output :
Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
} // End TargetPrefix = "R600", isTarget = 1
class TextureIntrinsicFloatInput : Intrinsic<[llvm_v4f32_ty], [
llvm_v4f32_ty, // Coord
llvm_i32_ty, // offset_x
llvm_i32_ty, // offset_y,
llvm_i32_ty, // offset_z,
llvm_i32_ty, // resource_id
llvm_i32_ty, // samplerid
llvm_i32_ty, // coord_type_x
llvm_i32_ty, // coord_type_y
llvm_i32_ty, // coord_type_z
llvm_i32_ty], // coord_type_w
[IntrNoMem]
>;
class TextureIntrinsicInt32Input : Intrinsic<[llvm_v4i32_ty], [
llvm_v4i32_ty, // Coord
llvm_i32_ty, // offset_x
llvm_i32_ty, // offset_y,
llvm_i32_ty, // offset_z,
llvm_i32_ty, // resource_id
llvm_i32_ty, // samplerid
llvm_i32_ty, // coord_type_x
llvm_i32_ty, // coord_type_y
llvm_i32_ty, // coord_type_z
llvm_i32_ty], // coord_type_w
[IntrNoMem]
>;
let TargetPrefix = "r600", isTarget = 1 in {
class TextureIntrinsicFloatInput :
Intrinsic<[llvm_v4f32_ty], [
llvm_v4f32_ty, // Coord
llvm_i32_ty, // offset_x
llvm_i32_ty, // offset_y,
llvm_i32_ty, // offset_z,
llvm_i32_ty, // resource_id
llvm_i32_ty, // samplerid
llvm_i32_ty, // coord_type_x
llvm_i32_ty, // coord_type_y
llvm_i32_ty, // coord_type_z
llvm_i32_ty // coord_type_w
], [IntrNoMem]>;
class TextureIntrinsicInt32Input :
Intrinsic<[llvm_v4i32_ty], [
llvm_v4i32_ty, // Coord
llvm_i32_ty, // offset_x
llvm_i32_ty, // offset_y,
llvm_i32_ty, // offset_z,
llvm_i32_ty, // resource_id
llvm_i32_ty, // samplerid
llvm_i32_ty, // coord_type_x
llvm_i32_ty, // coord_type_y
llvm_i32_ty, // coord_type_z
llvm_i32_ty // coord_type_w
], [IntrNoMem]>;
def int_r600_store_swizzle :
Intrinsic<[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], []
>;
def int_r600_tex : TextureIntrinsicFloatInput;
def int_r600_texc : TextureIntrinsicFloatInput;
def int_r600_txl : TextureIntrinsicFloatInput;
def int_r600_txlc : TextureIntrinsicFloatInput;
def int_r600_txb : TextureIntrinsicFloatInput;
def int_r600_txbc : TextureIntrinsicFloatInput;
def int_r600_txf : TextureIntrinsicInt32Input;
def int_r600_txq : TextureIntrinsicInt32Input;
def int_r600_ddx : TextureIntrinsicFloatInput;
def int_r600_ddy : TextureIntrinsicFloatInput;
def int_r600_store_stream_output : Intrinsic<
[], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []
>;
def int_r600_tex : TextureIntrinsicFloatInput;
def int_r600_texc : TextureIntrinsicFloatInput;
def int_r600_txl : TextureIntrinsicFloatInput;
def int_r600_txlc : TextureIntrinsicFloatInput;
def int_r600_txb : TextureIntrinsicFloatInput;
def int_r600_txbc : TextureIntrinsicFloatInput;
def int_r600_txf : TextureIntrinsicInt32Input;
def int_r600_txq : TextureIntrinsicInt32Input;
def int_r600_ddx : TextureIntrinsicFloatInput;
def int_r600_ddy : TextureIntrinsicFloatInput;
def int_r600_dot4 : Intrinsic<[llvm_float_ty],
[llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]
>;
def int_r600_dot4 : Intrinsic<[llvm_float_ty],
[llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]
>;
} // End TargetPrefix = "r600", isTarget = 1

View File

@ -1284,7 +1284,7 @@ ENDIF178: ; preds = %IF179, %ENDIF175
%tmp954 = insertelement <4 x float> %tmp953, float %result.i8, i32 1
%tmp955 = insertelement <4 x float> %tmp954, float %result.i4, i32 2
%tmp956 = insertelement <4 x float> %tmp955, float %tmp931, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %tmp956, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %tmp956, i32 0, i32 0)
ret void
}
@ -1303,7 +1303,7 @@ declare float @llvm.exp2.f32(float) #0
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.clamp.f32(float, float, float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0

View File

@ -9,9 +9,9 @@ entry:
%2 = fptoui float %1 to i32
%3 = bitcast i32 %2 to float
%4 = insertelement <4 x float> undef, float %3, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %4, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %4, i32 0, i32 0)
ret void
}
declare float @fabs(float ) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -44,11 +44,11 @@ entry:
%bc = fadd <4 x float> %b, %c
%de = fadd <4 x float> %d, %e
%bcde = fadd <4 x float> %bc, %de
call void @llvm.R600.store.swizzle(<4 x float> %bcde, i32 0, i32 1)
call void @llvm.r600.store.swizzle(<4 x float> %bcde, i32 0, i32 1)
ret void
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
; Function Attrs: readnone
declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0

View File

@ -85,11 +85,11 @@ entry:
%bcde = fadd <4 x float> %bc, %de
%fghi = fadd <4 x float> %fg, %hi
%bcdefghi = fadd <4 x float> %bcde, %fghi
call void @llvm.R600.store.swizzle(<4 x float> %bcdefghi, i32 0, i32 1)
call void @llvm.r600.store.swizzle(<4 x float> %bcdefghi, i32 0, i32 1)
ret void
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
; Function Attrs: readnone
declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0

View File

@ -5,10 +5,10 @@ define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = call float @floor(float %r0)
%vec = insertelement <4 x float> undef, float %r1, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
declare float @floor(float) readonly
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -9,9 +9,9 @@ define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r3 = fmul float %r0, %r1
%r4 = fadd float %r3, %r2
%vec = insertelement <4 x float> undef, float %r4, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
declare float @fabs(float ) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -8,8 +8,8 @@ define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r2 = fcmp oge float %r0, %r1
%r3 = select i1 %r2, float %r0, float %r1
%vec = insertelement <4 x float> undef, float %r3, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -8,8 +8,8 @@ define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r2 = fcmp uge float %r0, %r1
%r3 = select i1 %r2, float %r1, float %r0
%vec = insertelement <4 x float> undef, float %r3, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -13,7 +13,7 @@ entry:
ret void
}
declare float @llvm.R600.load.input(i32) readnone
declare float @llvm.r600.load.input(i32) readnone
declare void @llvm.AMDGPU.store.output(float, i32)

View File

@ -23,7 +23,7 @@ define void @s_fsub_f32(float addrspace(1)* %out, float %a, float %b) {
ret void
}
declare float @llvm.R600.load.input(i32) readnone
declare float @llvm.r600.load.input(i32) readnone
declare void @llvm.AMDGPU.store.output(float, i32)

View File

@ -15,10 +15,10 @@ define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r2 = fsub float -0.000000e+00, %r1
%r3 = call float @llvm.exp2.f32(float %r2)
%vec = insertelement <4 x float> undef, float %r3, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
declare float @llvm.exp2.f32(float) readnone
declare float @llvm.fabs.f32(float) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -36,7 +36,7 @@ ENDIF: ; preds = %IF13, %ELSE, %main_
%17 = insertelement <4 x float> %16, float %temp1.0, i32 1
%18 = insertelement <4 x float> %17, float %temp2.0, i32 2
%19 = insertelement <4 x float> %18, float %temp3.0, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %19, i32 0, i32 0)
ret void
IF13: ; preds = %ELSE
@ -47,4 +47,4 @@ IF13: ; preds = %ELSE
br label %ENDIF
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -44,7 +44,7 @@ main_body:
%37 = insertelement <4 x float> %36, float %33, i32 1
%38 = insertelement <4 x float> %37, float %34, i32 2
%39 = insertelement <4 x float> %38, float %35, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %39, i32 0, i32 0)
ret void
}
@ -92,9 +92,9 @@ main_body:
%37 = insertelement <4 x float> %36, float %33, i32 1
%38 = insertelement <4 x float> %37, float %34, i32 2
%39 = insertelement <4 x float> %38, float %35, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %39, i32 0, i32 0)
ret void
}
declare float @llvm.AMDGPU.clamp.f32(float, float, float) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -39,7 +39,7 @@ main_body:
%tmp29 = insertelement <4 x float> %tmp28, float %tmp25, i32 3
%tmp30 = shufflevector <4 x float> %tmp29, <4 x float> %tmp29, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%tmp31 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp30, i32 0, i32 0, i32 0, i32 16, i32 0, i32 1, i32 1, i32 1, i32 1)
call void @llvm.R600.store.swizzle(<4 x float> %tmp31, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %tmp31, i32 0, i32 0)
ret void
}
@ -49,7 +49,7 @@ declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
; Function Attrs: readnone
declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0

View File

@ -10,7 +10,7 @@ define amdgpu_ps void @test1(<4 x float> inreg %reg0) {
%r1 = extractelement <4 x float> %reg0, i32 1
%r2 = call float @llvm.pow.f32( float %r0, float %r1)
%vec = insertelement <4 x float> undef, float %r2, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
@ -29,10 +29,10 @@ define amdgpu_ps void @test1(<4 x float> inreg %reg0) {
;CHECK-NEXT: EXP_IEEE * T{{[0-9]+\.[XYZW], PV\.[XYZW]}},
define amdgpu_ps void @test2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
%vec = call <4 x float> @llvm.pow.v4f32( <4 x float> %reg0, <4 x float> %reg1)
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
declare float @llvm.pow.f32(float ,float ) readonly
declare <4 x float> @llvm.pow.v4f32(<4 x float> ,<4 x float> ) readonly
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -90,7 +90,7 @@ main_body:
%85 = insertelement <4 x float> %84, float 0.000000e+00, i32 3
%86 = call float @llvm.r600.dot4(<4 x float> %81, <4 x float> %85)
%87 = insertelement <4 x float> undef, float %86, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %87, i32 2, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %87, i32 2, i32 2)
ret void
}
@ -109,7 +109,7 @@ declare float @llvm.AMDGPU.clamp.f32(float, float, float) #1
; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #3
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
attributes #1 = { readnone }
attributes #2 = { readonly }

View File

@ -25,7 +25,7 @@ main_body:
%17 = call float @llvm.r600.dot4(<4 x float> %15,<4 x float> %16)
%18 = insertelement <4 x float> undef, float %17, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %18, i32 0, i32 2)
ret void
}
@ -54,13 +54,13 @@ main_body:
%17 = call float @llvm.r600.dot4(<4 x float> %15,<4 x float> %16)
%18 = insertelement <4 x float> undef, float %17, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %18, i32 0, i32 2)
ret void
}
; Function Attrs: readnone
declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
attributes #1 = { readnone }

View File

@ -17,10 +17,10 @@ IF: ; preds = %main_body
ENDIF: ; preds = %IF, %main_body
%5 = phi float [%4, %IF], [0.000000e+00, %main_body]
%6 = insertelement <4 x float> undef, float %5, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %6, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %6, i32 0, i32 0)
ret void
}
declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
attributes #1 = { readnone }

View File

@ -31,7 +31,7 @@ main_body:
%23 = insertelement <4 x float> %22, float %19, i32 3
%24 = call float @llvm.r600.dot4(<4 x float> %23, <4 x float> %10)
%25 = insertelement <4 x float> undef, float %24, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %25, i32 0, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %25, i32 0, i32 2)
ret void
}
@ -39,6 +39,6 @@ main_body:
declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
attributes #1 = { readnone }

View File

@ -209,12 +209,12 @@ main_body:
%201 = insertelement <4 x float> %200, float %79, i32 1
%202 = insertelement <4 x float> %201, float %83, i32 2
%203 = insertelement <4 x float> %202, float %87, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %203, i32 60, i32 1)
call void @llvm.r600.store.swizzle(<4 x float> %203, i32 60, i32 1)
%204 = insertelement <4 x float> undef, float %197, i32 0
%205 = insertelement <4 x float> %204, float %198, i32 1
%206 = insertelement <4 x float> %205, float %199, i32 2
%207 = insertelement <4 x float> %206, float %117, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %207, i32 0, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %207, i32 0, i32 2)
ret void
}
@ -233,7 +233,7 @@ declare float @llvm.AMDGPU.clamp.f32(float, float, float) #1
; Function Attrs: nounwind readonly
declare float @llvm.pow.f32(float, float) #2
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32) #3
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) #3
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind readonly }

View File

@ -16,8 +16,8 @@ entry:
%r1 = extractelement <4 x float> %reg0, i32 1
%r2 = fmul float %r0, %r1
%vec = insertelement <4 x float> undef, float %r2, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -98,43 +98,43 @@ main_body:
%83 = insertelement <4 x float> %82, float %55, i32 1
%84 = insertelement <4 x float> %83, float %59, i32 2
%85 = insertelement <4 x float> %84, float %63, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %85, i32 60, i32 1)
call void @llvm.r600.store.swizzle(<4 x float> %85, i32 60, i32 1)
%86 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
%87 = insertelement <4 x float> %86, float 0.000000e+00, i32 1
%88 = insertelement <4 x float> %87, float 0.000000e+00, i32 2
%89 = insertelement <4 x float> %88, float 0.000000e+00, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %89, i32 0, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %89, i32 0, i32 2)
%90 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
%91 = insertelement <4 x float> %90, float 0.000000e+00, i32 1
%92 = insertelement <4 x float> %91, float 0.000000e+00, i32 2
%93 = insertelement <4 x float> %92, float 0.000000e+00, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %93, i32 1, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %93, i32 1, i32 2)
%94 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
%95 = insertelement <4 x float> %94, float %65, i32 1
%96 = insertelement <4 x float> %95, float %67, i32 2
%97 = insertelement <4 x float> %96, float %69, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %97, i32 2, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %97, i32 2, i32 2)
%98 = insertelement <4 x float> undef, float %77, i32 0
%99 = insertelement <4 x float> %98, float %79, i32 1
%100 = insertelement <4 x float> %99, float %81, i32 2
%101 = insertelement <4 x float> %100, float %71, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %101, i32 3, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %101, i32 3, i32 2)
%102 = insertelement <4 x float> undef, float %73, i32 0
%103 = insertelement <4 x float> %102, float %75, i32 1
%104 = insertelement <4 x float> %103, float 0.000000e+00, i32 2
%105 = insertelement <4 x float> %104, float 0.000000e+00, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %105, i32 4, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %105, i32 4, i32 2)
%106 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
%107 = insertelement <4 x float> %106, float 0.000000e+00, i32 1
%108 = insertelement <4 x float> %107, float 0.000000e+00, i32 2
%109 = insertelement <4 x float> %108, float 0.000000e+00, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %109, i32 5, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %109, i32 5, i32 2)
%110 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
%111 = insertelement <4 x float> %110, float 0.000000e+00, i32 1
%112 = insertelement <4 x float> %111, float 0.000000e+00, i32 2
%113 = insertelement <4 x float> %112, float 0.000000e+00, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %113, i32 6, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %113, i32 6, i32 2)
ret void
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -40,7 +40,7 @@ main_body:
%tmp36 = insertelement <4 x float> %tmp35, float %tmp34, i32 1
%tmp37 = insertelement <4 x float> %tmp36, float %tmp34, i32 2
%tmp38 = insertelement <4 x float> %tmp37, float 1.000000e+00, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %tmp38, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %tmp38, i32 0, i32 0)
ret void
}
@ -50,7 +50,7 @@ declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #0
; Function Attrs: readnone
declare float @fabs(float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
; Function Attrs: readnone
declare <4 x float> @llvm.r600.texc(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0

View File

@ -32,27 +32,27 @@ IF41: ; preds = %LOOP
%17 = insertelement <4 x float> %16, float %temp8.0, i32 1
%18 = insertelement <4 x float> %17, float %temp12.0, i32 2
%19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3
call void @llvm.R600.store.stream.output(<4 x float> %19, i32 0, i32 0, i32 1)
call void @llvm.r600.store.stream.output(<4 x float> %19, i32 0, i32 0, i32 1)
%20 = insertelement <4 x float> undef, float %0, i32 0
%21 = insertelement <4 x float> %20, float %temp8.0, i32 1
%22 = insertelement <4 x float> %21, float %temp12.0, i32 2
%23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3
call void @llvm.R600.store.stream.output(<4 x float> %23, i32 0, i32 0, i32 2)
call void @llvm.r600.store.stream.output(<4 x float> %23, i32 0, i32 0, i32 2)
%24 = insertelement <4 x float> undef, float %0, i32 0
%25 = insertelement <4 x float> %24, float %temp8.0, i32 1
%26 = insertelement <4 x float> %25, float %temp12.0, i32 2
%27 = insertelement <4 x float> %26, float 0.000000e+00, i32 3
call void @llvm.R600.store.stream.output(<4 x float> %27, i32 0, i32 0, i32 4)
call void @llvm.r600.store.stream.output(<4 x float> %27, i32 0, i32 0, i32 4)
%28 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
%29 = insertelement <4 x float> %28, float 0.000000e+00, i32 1
%30 = insertelement <4 x float> %29, float 0.000000e+00, i32 2
%31 = insertelement <4 x float> %30, float 0.000000e+00, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %31, i32 60, i32 1)
call void @llvm.r600.store.swizzle(<4 x float> %31, i32 60, i32 1)
%32 = insertelement <4 x float> undef, float %0, i32 0
%33 = insertelement <4 x float> %32, float %temp8.0, i32 1
%34 = insertelement <4 x float> %33, float %temp12.0, i32 2
%35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %35, i32 0, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %35, i32 0, i32 2)
ret void
ENDIF40: ; preds = %LOOP
@ -112,6 +112,6 @@ ENDIF48: ; preds = %LOOP47
br label %LOOP47
}
declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
declare void @llvm.r600.store.stream.output(<4 x float>, i32, i32, i32)
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -6,8 +6,8 @@ define amdgpu_ps void @test(<4 x float> inreg %reg0) {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = fdiv float 1.0, %r0
%vec = insertelement <4 x float> undef, float %r1, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
ret void
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -40,11 +40,11 @@ bb:
%tmp34 = fadd <4 x float> %tmp33, %tmp23
%tmp35 = fadd <4 x float> %tmp34, %tmp25
%tmp36 = fadd <4 x float> %tmp35, %tmp27
call void @llvm.R600.store.swizzle(<4 x float> %tmp36, i32 0, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %tmp36, i32 0, i32 2)
ret void
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0

View File

@ -52,7 +52,7 @@ ENDIF: ; preds = %ELSE17, %ELSE, %IF
%32 = insertelement <4 x float> %31, float %28, i32 1
%33 = insertelement <4 x float> %32, float %29, i32 2
%34 = insertelement <4 x float> %33, float %30, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %34, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %34, i32 0, i32 0)
ret void
ELSE17: ; preds = %ELSE
@ -76,6 +76,6 @@ ELSE17: ; preds = %ELSE
declare float @llvm.AMDGPU.clamp.f32(float, float, float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { readnone }

View File

@ -51,7 +51,7 @@ IF: ; preds = %LOOP
%35 = insertelement <4 x float> %34, float %31, i32 1
%36 = insertelement <4 x float> %35, float %32, i32 2
%37 = insertelement <4 x float> %36, float %33, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %37, i32 0, i32 0)
ret void
LOOP29: ; preds = %LOOP, %ENDIF30
@ -83,6 +83,6 @@ ENDIF30: ; preds = %LOOP29
declare float @llvm.AMDGPU.clamp.f32(float, float, float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { readnone }

View File

@ -38,7 +38,7 @@ IF: ; preds = %LOOP
%22 = insertelement <4 x float> %21, float %18, i32 1
%23 = insertelement <4 x float> %22, float %19, i32 2
%24 = insertelement <4 x float> %23, float %20, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %24, i32 0, i32 0)
ret void
ENDIF: ; preds = %LOOP
@ -50,6 +50,6 @@ ENDIF: ; preds = %LOOP
declare float @llvm.AMDGPU.clamp.f32(float, float, float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { readnone }

View File

@ -66,7 +66,7 @@ ENDIF: ; preds = %IF23, %ELSE, %IF
%45 = insertelement <4 x float> %44, float %temp5.0, i32 1
%46 = insertelement <4 x float> %45, float %temp6.0, i32 2
%47 = insertelement <4 x float> %46, float %temp7.0, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %47, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %47, i32 0, i32 0)
ret void
IF23: ; preds = %ELSE
@ -89,6 +89,6 @@ IF23: ; preds = %ELSE
declare float @fabs(float) #0
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
attributes #0 = { readonly }

View File

@ -32,7 +32,7 @@ ENDIF: ; preds = %IF13, %ELSE, %main_
%17 = insertelement <4 x float> %16, float %temp1.0, i32 1
%18 = insertelement <4 x float> %17, float 0.000000e+00, i32 2
%19 = insertelement <4 x float> %18, float %temp3.0, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %19, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %19, i32 0, i32 0)
ret void
IF13: ; preds = %ELSE
@ -43,4 +43,4 @@ IF13: ; preds = %ELSE
br label %ENDIF
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -85,12 +85,12 @@ ENDIF: ; preds = %ENDIF16, %LOOP, %ma
%72 = insertelement <4 x float> %71, float %62, i32 1
%73 = insertelement <4 x float> %72, float %66, i32 2
%74 = insertelement <4 x float> %73, float %70, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %74, i32 60, i32 1)
call void @llvm.r600.store.swizzle(<4 x float> %74, i32 60, i32 1)
%75 = insertelement <4 x float> undef, float %temp.0, i32 0
%76 = insertelement <4 x float> %75, float %temp1.0, i32 1
%77 = insertelement <4 x float> %76, float %temp2.0, i32 2
%78 = insertelement <4 x float> %77, float %temp3.0, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %78, i32 0, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %78, i32 0, i32 2)
ret void
LOOP: ; preds = %main_body, %ENDIF19
@ -127,4 +127,4 @@ ENDIF19: ; preds = %ENDIF16
br label %LOOP
}
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -19,13 +19,13 @@ define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4
%v2 = insertelement <4 x float> %v1, float %r2, i32 2
%res = call float @llvm.r600.dot4(<4 x float> %v2, <4 x float> %v2)
%vecres = insertelement <4 x float> undef, float %res, i32 0
call void @llvm.R600.store.swizzle(<4 x float> %vecres, i32 0, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %vecres, i32 0, i32 2)
ret void
}
; Function Attrs: readnone
declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
attributes #1 = { readnone }

View File

@ -68,27 +68,27 @@ main_body:
%57 = insertelement <4 x float> %56, float %1, i32 1
%58 = insertelement <4 x float> %57, float %2, i32 2
%59 = insertelement <4 x float> %58, float %3, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %59, i32 60, i32 1)
call void @llvm.r600.store.swizzle(<4 x float> %59, i32 60, i32 1)
%60 = insertelement <4 x float> undef, float %10, i32 0
%61 = insertelement <4 x float> %60, float %13, i32 1
%62 = insertelement <4 x float> %61, float %16, i32 2
%63 = insertelement <4 x float> %62, float %19, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %63, i32 0, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %63, i32 0, i32 2)
%64 = insertelement <4 x float> undef, float %22, i32 0
%65 = insertelement <4 x float> %64, float %25, i32 1
%66 = insertelement <4 x float> %65, float %28, i32 2
%67 = insertelement <4 x float> %66, float %31, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %67, i32 1, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %67, i32 1, i32 2)
%68 = insertelement <4 x float> undef, float %34, i32 0
%69 = insertelement <4 x float> %68, float %37, i32 1
%70 = insertelement <4 x float> %69, float %40, i32 2
%71 = insertelement <4 x float> %70, float %43, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %71, i32 2, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %71, i32 2, i32 2)
%72 = insertelement <4 x float> undef, float %46, i32 0
%73 = insertelement <4 x float> %72, float %49, i32 1
%74 = insertelement <4 x float> %73, float %52, i32 2
%75 = insertelement <4 x float> %74, float %55, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %75, i32 3, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %75, i32 3, i32 2)
ret void
}
@ -111,18 +111,18 @@ main_body:
%10 = extractelement <4 x float> %9, i32 1
%11 = insertelement <4 x float> undef, float %2, i32 0
%12 = insertelement <4 x float> %11, float %3, i32 1
call void @llvm.R600.store.swizzle(<4 x float> %12, i32 60, i32 1)
call void @llvm.r600.store.swizzle(<4 x float> %12, i32 60, i32 1)
%13 = insertelement <4 x float> undef, float %6, i32 0
%14 = insertelement <4 x float> %13, float %8, i32 1
%15 = insertelement <4 x float> %14, float %10, i32 2
%16 = insertelement <4 x float> %15, float 0.000000e+00, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %16, i32 0, i32 2)
call void @llvm.r600.store.swizzle(<4 x float> %16, i32 0, i32 2)
ret void
}
; Function Attrs: nounwind readonly
declare float @llvm.cos.f32(float) #1
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
attributes #1 = { nounwind readonly }

View File

@ -15,9 +15,9 @@ define amdgpu_vs void @test(<4 x float> inreg %reg0) {
%9 = call <4 x float> @llvm.r600.tex(<4 x float> %8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%10 = call <4 x float> @llvm.r600.tex(<4 x float> %8, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%11 = fadd <4 x float> %9, %10
call void @llvm.R600.store.swizzle(<4 x float> %11, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %11, i32 0, i32 0)
ret void
}
declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)

View File

@ -21,9 +21,9 @@ define amdgpu_vs void @test(<4 x float> inreg %reg0) {
%16 = call <4 x float> @llvm.r600.tex(<4 x float> %13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%17 = fadd <4 x float> %14, %15
%18 = fadd <4 x float> %17, %16
call void @llvm.R600.store.swizzle(<4 x float> %18, i32 0, i32 0)
call void @llvm.r600.store.swizzle(<4 x float> %18, i32 0, i32 0)
ret void
}
declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)