mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-29 14:40:25 +00:00
R600: Properly set COUNT_3 bit in TEX clause initiating inst for pre EG gen.
Fixes rv7x0 bug in Heaven reported here: https://bugs.freedesktop.org/show_bug.cgi?id=64257 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184116 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4b548ecb01
commit
98f5cf8000
@ -575,14 +575,16 @@ class CF_WORD0_R600 {
|
||||
class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
|
||||
ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
|
||||
field bits<64> Inst;
|
||||
bits<4> CNT;
|
||||
|
||||
let CF_INST = inst;
|
||||
let BARRIER = 1;
|
||||
let CF_CONST = 0;
|
||||
let VALID_PIXEL_MODE = 0;
|
||||
let COND = 0;
|
||||
let COUNT = CNT{2-0};
|
||||
let CALL_COUNT = 0;
|
||||
let COUNT_3 = 0;
|
||||
let COUNT_3 = CNT{3};
|
||||
let END_OF_PROGRAM = 0;
|
||||
let WHOLE_QUAD_MODE = 0;
|
||||
|
||||
@ -1162,52 +1164,52 @@ let Predicates = [isR600] in {
|
||||
}
|
||||
defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
|
||||
|
||||
def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
|
||||
"TEX $COUNT @$ADDR"> {
|
||||
def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT),
|
||||
"TEX $CNT @$ADDR"> {
|
||||
let POP_COUNT = 0;
|
||||
}
|
||||
def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
|
||||
"VTX $COUNT @$ADDR"> {
|
||||
def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT),
|
||||
"VTX $CNT @$ADDR"> {
|
||||
let POP_COUNT = 0;
|
||||
}
|
||||
def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
|
||||
"LOOP_START_DX10 @$ADDR"> {
|
||||
let POP_COUNT = 0;
|
||||
let COUNT = 0;
|
||||
let CNT = 0;
|
||||
}
|
||||
def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
|
||||
let POP_COUNT = 0;
|
||||
let COUNT = 0;
|
||||
let CNT = 0;
|
||||
}
|
||||
def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
|
||||
"LOOP_BREAK @$ADDR"> {
|
||||
let POP_COUNT = 0;
|
||||
let COUNT = 0;
|
||||
let CNT = 0;
|
||||
}
|
||||
def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
|
||||
"CONTINUE @$ADDR"> {
|
||||
let POP_COUNT = 0;
|
||||
let COUNT = 0;
|
||||
let CNT = 0;
|
||||
}
|
||||
def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
|
||||
"JUMP @$ADDR POP:$POP_COUNT"> {
|
||||
let COUNT = 0;
|
||||
let CNT = 0;
|
||||
}
|
||||
def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
|
||||
"ELSE @$ADDR POP:$POP_COUNT"> {
|
||||
let COUNT = 0;
|
||||
let CNT = 0;
|
||||
}
|
||||
def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
|
||||
let ADDR = 0;
|
||||
let COUNT = 0;
|
||||
let CNT = 0;
|
||||
let POP_COUNT = 0;
|
||||
}
|
||||
def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
|
||||
"POP @$ADDR POP:$POP_COUNT"> {
|
||||
let COUNT = 0;
|
||||
let CNT = 0;
|
||||
}
|
||||
def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
|
||||
let COUNT = 0;
|
||||
let CNT = 0;
|
||||
let POP_COUNT = 0;
|
||||
let ADDR = 0;
|
||||
let END_OF_PROGRAM = 1;
|
||||
|
44
test/CodeGen/R600/rv7x0_count3.ll
Normal file
44
test/CodeGen/R600/rv7x0_count3.ll
Normal file
@ -0,0 +1,44 @@
|
||||
; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rv710 | FileCheck %s
|
||||
|
||||
; CHECK: TEX 9 @4 ; encoding: [0x04,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
|
||||
|
||||
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
|
||||
%1 = call float @llvm.R600.load.input(i32 4)
|
||||
%2 = call float @llvm.R600.load.input(i32 5)
|
||||
%3 = call float @llvm.R600.load.input(i32 6)
|
||||
%4 = call float @llvm.R600.load.input(i32 7)
|
||||
%5 = insertelement <4 x float> undef, float %1, i32 0
|
||||
%6 = insertelement <4 x float> %5, float %2, i32 1
|
||||
%7 = insertelement <4 x float> %6, float %3, i32 2
|
||||
%8 = insertelement <4 x float> %7, float %4, i32 3
|
||||
%9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 0, i32 0, i32 1)
|
||||
%10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 1, i32 0, i32 1)
|
||||
%11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 2, i32 0, i32 1)
|
||||
%12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 3, i32 0, i32 1)
|
||||
%13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 4, i32 0, i32 1)
|
||||
%14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 5, i32 0, i32 1)
|
||||
%15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 6, i32 0, i32 1)
|
||||
%16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 7, i32 0, i32 1)
|
||||
%17 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 8, i32 0, i32 1)
|
||||
%18 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 9, i32 0, i32 1)
|
||||
%19 = fadd <4 x float> %9, %10
|
||||
%20 = fadd <4 x float> %19, %11
|
||||
%21 = fadd <4 x float> %20, %12
|
||||
%22 = fadd <4 x float> %21, %13
|
||||
%23 = fadd <4 x float> %22, %14
|
||||
%24 = fadd <4 x float> %23, %15
|
||||
%25 = fadd <4 x float> %24, %16
|
||||
%26 = fadd <4 x float> %25, %17
|
||||
%27 = fadd <4 x float> %26, %18
|
||||
call void @llvm.R600.store.swizzle(<4 x float> %27, i32 0, i32 2)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
|
||||
|
||||
; Function Attrs: readnone
|
||||
declare float @llvm.R600.load.input(i32) #1
|
||||
|
||||
|
||||
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
|
||||
attributes #1 = { readnone }
|
Loading…
Reference in New Issue
Block a user