R600: Properly set COUNT_3 bit in TEX clause initiating inst for pre EG gen.

Fixes rv7x0 bug in Heaven reported here:
https://bugs.freedesktop.org/show_bug.cgi?id=64257

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@184116 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vincent Lejeune 2013-06-17 20:16:26 +00:00
parent 4b548ecb01
commit 98f5cf8000
2 changed files with 60 additions and 14 deletions

View File

@ -575,14 +575,16 @@ class CF_WORD0_R600 {
class CF_CLAUSE_R600 <bits<7> inst, dag ins, string AsmPrint> : AMDGPUInst <(outs),
ins, AsmPrint, [] >, CF_WORD0_R600, CF_WORD1_R600 {
field bits<64> Inst;
bits<4> CNT;
let CF_INST = inst;
let BARRIER = 1;
let CF_CONST = 0;
let VALID_PIXEL_MODE = 0;
let COND = 0;
let COUNT = CNT{2-0};
let CALL_COUNT = 0;
let COUNT_3 = 0;
let COUNT_3 = CNT{3};
let END_OF_PROGRAM = 0;
let WHOLE_QUAD_MODE = 0;
@ -1162,52 +1164,52 @@ let Predicates = [isR600] in {
}
defm : SteamOutputExportPattern<R600_ExportBuf, 0x20, 0x21, 0x22, 0x23>;
def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$COUNT),
"TEX $COUNT @$ADDR"> {
def CF_TC_R600 : CF_CLAUSE_R600<1, (ins i32imm:$ADDR, i32imm:$CNT),
"TEX $CNT @$ADDR"> {
let POP_COUNT = 0;
}
def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$COUNT),
"VTX $COUNT @$ADDR"> {
def CF_VC_R600 : CF_CLAUSE_R600<2, (ins i32imm:$ADDR, i32imm:$CNT),
"VTX $CNT @$ADDR"> {
let POP_COUNT = 0;
}
def WHILE_LOOP_R600 : CF_CLAUSE_R600<6, (ins i32imm:$ADDR),
"LOOP_START_DX10 @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
let CNT = 0;
}
def END_LOOP_R600 : CF_CLAUSE_R600<5, (ins i32imm:$ADDR), "END_LOOP @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
let CNT = 0;
}
def LOOP_BREAK_R600 : CF_CLAUSE_R600<9, (ins i32imm:$ADDR),
"LOOP_BREAK @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
let CNT = 0;
}
def CF_CONTINUE_R600 : CF_CLAUSE_R600<8, (ins i32imm:$ADDR),
"CONTINUE @$ADDR"> {
let POP_COUNT = 0;
let COUNT = 0;
let CNT = 0;
}
def CF_JUMP_R600 : CF_CLAUSE_R600<10, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"JUMP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
let CNT = 0;
}
def CF_ELSE_R600 : CF_CLAUSE_R600<13, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"ELSE @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
let CNT = 0;
}
def CF_CALL_FS_R600 : CF_CLAUSE_R600<19, (ins), "CALL_FS"> {
let ADDR = 0;
let COUNT = 0;
let CNT = 0;
let POP_COUNT = 0;
}
def POP_R600 : CF_CLAUSE_R600<14, (ins i32imm:$ADDR, i32imm:$POP_COUNT),
"POP @$ADDR POP:$POP_COUNT"> {
let COUNT = 0;
let CNT = 0;
}
def CF_END_R600 : CF_CLAUSE_R600<0, (ins), "CF_END"> {
let COUNT = 0;
let CNT = 0;
let POP_COUNT = 0;
let ADDR = 0;
let END_OF_PROGRAM = 1;

View File

@ -0,0 +1,44 @@
; RUN: llc < %s -march=r600 -show-mc-encoding -mcpu=rv710 | FileCheck %s
; CHECK: TEX 9 @4 ; encoding: [0x04,0x00,0x00,0x00,0x00,0x04,0x88,0x80]
define void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
%1 = call float @llvm.R600.load.input(i32 4)
%2 = call float @llvm.R600.load.input(i32 5)
%3 = call float @llvm.R600.load.input(i32 6)
%4 = call float @llvm.R600.load.input(i32 7)
%5 = insertelement <4 x float> undef, float %1, i32 0
%6 = insertelement <4 x float> %5, float %2, i32 1
%7 = insertelement <4 x float> %6, float %3, i32 2
%8 = insertelement <4 x float> %7, float %4, i32 3
%9 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 0, i32 0, i32 1)
%10 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 1, i32 0, i32 1)
%11 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 2, i32 0, i32 1)
%12 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 3, i32 0, i32 1)
%13 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 4, i32 0, i32 1)
%14 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 5, i32 0, i32 1)
%15 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 6, i32 0, i32 1)
%16 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 7, i32 0, i32 1)
%17 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 8, i32 0, i32 1)
%18 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 9, i32 0, i32 1)
%19 = fadd <4 x float> %9, %10
%20 = fadd <4 x float> %19, %11
%21 = fadd <4 x float> %20, %12
%22 = fadd <4 x float> %21, %13
%23 = fadd <4 x float> %22, %14
%24 = fadd <4 x float> %23, %15
%25 = fadd <4 x float> %24, %16
%26 = fadd <4 x float> %25, %17
%27 = fadd <4 x float> %26, %18
call void @llvm.R600.store.swizzle(<4 x float> %27, i32 0, i32 2)
ret void
}
declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
; Function Attrs: readnone
declare float @llvm.R600.load.input(i32) #1
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
attributes #1 = { readnone }