R600: initial copy of r300 code

This commit is contained in:
Alex Deucher 2009-04-08 15:16:35 -04:00
parent 90ffce4973
commit 4138bdb3b1
44 changed files with 18523 additions and 26 deletions

View File

@ -724,7 +724,7 @@ if test "$mesa_driver" = dri; then
# because there is no x86-64 system where they could *ever*
# be used.
if test "x$DRI_DIRS" = "xyes"; then
DRI_DIRS="i915 i965 mach64 mga r128 r200 r300 radeon \
DRI_DIRS="i915 i965 mach64 mga r128 r200 r300 r600 radeon \
savage tdfx unichrome swrast"
fi
;;
@ -732,13 +732,13 @@ if test "$mesa_driver" = dri; then
# Build only the drivers for cards that exist on PowerPC.
# At some point MGA will be added, but not yet.
if test "x$DRI_DIRS" = "xyes"; then
DRI_DIRS="mach64 r128 r200 r300 radeon tdfx swrast"
DRI_DIRS="mach64 r128 r200 r300 r600 radeon tdfx swrast"
fi
;;
sparc*)
# Build only the drivers for cards that exist on sparc`
if test "x$DRI_DIRS" = "xyes"; then
DRI_DIRS="mach64 r128 r200 r300 radeon ffb swrast"
DRI_DIRS="mach64 r128 r200 r300 r600 radeon ffb swrast"
fi
;;
esac
@ -757,7 +757,7 @@ if test "$mesa_driver" = dri; then
# ffb and gamma are missing because they have not been converted
# to use the new interface.
if test "x$DRI_DIRS" = "xyes"; then
DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 radeon tdfx \
DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 r600 radeon tdfx \
unichrome savage sis swrast"
fi
;;
@ -772,7 +772,7 @@ if test "$mesa_driver" = dri; then
# default drivers
if test "x$DRI_DIRS" = "xyes"; then
DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 radeon s3v \
DRI_DIRS="i810 i915 i965 mach64 mga r128 r200 r300 r600 radeon s3v \
savage sis tdfx trident unichrome ffb swrast"
fi

View File

@ -0,0 +1,2 @@
#!/bin/sh
indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs "$@"

View File

@ -0,0 +1,118 @@
# src/mesa/drivers/dri/r300/Makefile
TOP = ../../../../..
include $(TOP)/configs/current
CFLAGS += $(RADEON_CFLAGS)
LIBNAME = r600_dri.so
MINIGLX_SOURCES = server/radeon_dri.c
ifeq ($(USING_EGL), 1)
EGL_SOURCES = server/radeon_egl.c
endif
COMMON_SOURCES = \
../../common/driverfuncs.c \
../common/mm.c \
../common/utils.c \
../common/texmem.c \
../common/vblank.c \
../common/xmlconfig.c \
../common/dri_util.c
RADEON_COMMON_SOURCES = \
radeon_texture.c \
radeon_common_context.c \
radeon_common.c \
radeon_dma.c \
radeon_lock.c \
radeon_bo_legacy.c \
radeon_cs_legacy.c \
radeon_mipmap_tree.c \
radeon_span.c \
radeon_fbo.c
DRIVER_SOURCES = \
radeon_screen.c \
r600_context.c \
r600_ioctl.c \
r600_cmdbuf.c \
r600_state.c \
r600_render.c \
r600_tex.c \
r600_texstate.c \
radeon_program.c \
radeon_program_alu.c \
radeon_program_pair.c \
radeon_nqssadce.c \
r600_vertprog.c \
r600_fragprog.c \
r600_fragprog_swizzle.c \
r600_fragprog_emit.c \
r700_fragprog.c \
r700_fragprog_emit.c \
r600_shader.c \
r600_emit.c \
r600_swtcl.c \
$(RADEON_COMMON_SOURCES) \
$(EGL_SOURCES)
C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
DRIVER_DEFINES = -DCOMPILE_R600 -DR200_MERGED=0 \
-DRADEON_COMMON=1 -DRADEON_COMMON_FOR_R600 \
# -DRADEON_BO_TRACK \
-Wall
SYMLINKS = \
server/radeon_dri.c \
server/radeon_dri.h \
server/radeon.h \
server/radeon_macros.h \
server/radeon_reg.h \
server/radeon_egl.c
COMMON_SYMLINKS = \
radeon_chipset.h \
radeon_screen.c \
radeon_screen.h \
radeon_span.h \
radeon_span.c \
radeon_bo_legacy.c \
radeon_cs_legacy.c \
radeon_bo_legacy.h \
radeon_cs_legacy.h \
radeon_bocs_wrapper.h \
radeon_lock.c \
radeon_lock.h \
radeon_common.c \
radeon_common.h \
radeon_common_context.c \
radeon_common_context.h \
radeon_cmdbuf.h \
radeon_dma.c \
radeon_dma.h \
radeon_mipmap_tree.c \
radeon_mipmap_tree.h \
radeon_texture.c \
radeon_texture.h \
radeon_fbo.c
DRI_LIB_DEPS += $(RADEON_LDFLAGS)
##### TARGETS #####
include ../Makefile.template
server:
mkdir -p server
$(SYMLINKS): server
@[ -e $@ ] || ln -sf ../../radeon/$@ server/
$(COMMON_SYMLINKS):
@[ -e $@ ] || ln -sf ../radeon/$@ ./
symlinks: $(SYMLINKS) $(COMMON_SYMLINKS)

View File

@ -0,0 +1,669 @@
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/**
* \file
*
* \author Nicolai Haehnle <prefect_@gmx.net>
*/
#include "main/glheader.h"
#include "main/state.h"
#include "main/imports.h"
#include "main/macros.h"
#include "main/context.h"
#include "main/simple_list.h"
#include "swrast/swrast.h"
#include "drm.h"
#include "radeon_drm.h"
#include "r600_context.h"
#include "r600_ioctl.h"
#include "radeon_reg.h"
#include "r600_reg.h"
#include "r600_cmdbuf.h"
#include "r600_emit.h"
#include "radeon_bocs_wrapper.h"
#include "radeon_mipmap_tree.h"
#include "r600_state.h"
#include "radeon_reg.h"
#define R300_VAP_PVS_UPLOAD_ADDRESS 0x2200
# define RADEON_ONE_REG_WR (1 << 15)
/** # of dwords reserved for additional instructions that may need to be written
* during flushing.
*/
#define SPACE_FOR_FLUSHING 4
static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt)
{
if (r300->radeon.radeonScreen->kernel_mm) {
return ((((*pkt) >> 16) & 0x3FFF) + 1);
} else {
drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt;
return t->packet0.count;
}
return 0;
}
#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
BATCH_LOCALS(&r300->radeon);
drm_r300_cmd_header_t cmd;
uint32_t addr, ndw, i;
if (!r300->radeon.radeonScreen->kernel_mm) {
uint32_t dwords;
dwords = (*atom->check) (ctx, atom);
BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_BATCH_TABLE(atom->cmd, dwords);
END_BATCH();
return;
}
cmd.u = atom->cmd[0];
addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
ndw = cmd.vpu.count * 4;
if (ndw) {
if (r300->vap_flush_needed) {
BEGIN_BATCH_NO_AUTOSTATE(15 + ndw);
/* flush processing vertices */
OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0);
OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
OUT_BATCH_REGVAL(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
OUT_BATCH_REGVAL(R300_SC_SCREENDOOR, 0xffffff);
OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
r300->vap_flush_needed = GL_FALSE;
} else {
BEGIN_BATCH_NO_AUTOSTATE(5 + ndw);
}
OUT_BATCH_REGVAL(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, ndw-1) | RADEON_ONE_REG_WR);
for (i = 0; i < ndw; i++) {
OUT_BATCH(atom->cmd[i+1]);
}
OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
END_BATCH();
}
}
void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
BATCH_LOCALS(&r300->radeon);
drm_r300_cmd_header_t cmd;
uint32_t addr, ndw, i, sz;
int type, clamp, stride;
if (!r300->radeon.radeonScreen->kernel_mm) {
uint32_t dwords;
dwords = (*atom->check) (ctx, atom);
BEGIN_BATCH_NO_AUTOSTATE(dwords);
OUT_BATCH_TABLE(atom->cmd, dwords);
END_BATCH();
return;
}
cmd.u = atom->cmd[0];
sz = cmd.r500fp.count;
addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo;
type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
addr |= (type << 16);
addr |= (clamp << 17);
stride = type ? 4 : 6;
ndw = sz * stride;
if (ndw) {
BEGIN_BATCH_NO_AUTOSTATE(3 + ndw);
OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0));
OUT_BATCH(addr);
OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, ndw-1) | RADEON_ONE_REG_WR);
for (i = 0; i < ndw; i++) {
OUT_BATCH(atom->cmd[i+1]);
}
END_BATCH();
}
}
static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
BATCH_LOCALS(&r300->radeon);
int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
int notexture = 0;
if (numtmus) {
int i;
for(i = 0; i < numtmus; ++i) {
radeonTexObj *t = r300->hw.textures[i];
if (!t)
notexture = 1;
}
if (r300->radeon.radeonScreen->kernel_mm && notexture) {
return;
}
BEGIN_BATCH_NO_AUTOSTATE(4 * numtmus);
for(i = 0; i < numtmus; ++i) {
radeonTexObj *t = r300->hw.textures[i];
OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
if (t && !t->image_override) {
OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, 0,
RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
} else if (!t) {
OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
} else { /* override cases */
if (t->bo) {
OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
} else if (!r300->radeon.radeonScreen->kernel_mm) {
OUT_BATCH(t->override_offset);
}
else
OUT_BATCH(r300->radeon.radeonScreen->texOffset[0]);
}
}
END_BATCH();
}
}
static void emit_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
BATCH_LOCALS(&r300->radeon);
struct radeon_renderbuffer *rrb;
uint32_t cbpitch;
uint32_t offset = r300->radeon.state.color.draw_offset;
rrb = radeon_get_colorbuffer(&r300->radeon);
if (!rrb || !rrb->bo) {
fprintf(stderr, "no rrb\n");
return;
}
cbpitch = (rrb->pitch / rrb->cpp);
if (rrb->cpp == 4)
cbpitch |= R300_COLOR_FORMAT_ARGB8888;
else
cbpitch |= R300_COLOR_FORMAT_RGB565;
if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
cbpitch |= R300_COLOR_TILE_ENABLE;
BEGIN_BATCH_NO_AUTOSTATE(8);
OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
OUT_BATCH_RELOC(offset, rrb->bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
OUT_BATCH_RELOC(cbpitch, rrb->bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
if (r300->radeon.radeonScreen->driScreen->dri2.enabled) {
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
BEGIN_BATCH_NO_AUTOSTATE(3);
OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
OUT_BATCH(0);
OUT_BATCH((rrb->width << R300_SCISSORS_X_SHIFT) |
(rrb->height << R300_SCISSORS_Y_SHIFT));
END_BATCH();
} else {
BEGIN_BATCH_NO_AUTOSTATE(3);
OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) |
(R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT));
OUT_BATCH(((rrb->width + R300_SCISSORS_OFFSET) << R300_SCISSORS_X_SHIFT) |
((rrb->height + R300_SCISSORS_OFFSET) << R300_SCISSORS_Y_SHIFT));
END_BATCH();
}
}
}
static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
BATCH_LOCALS(&r300->radeon);
struct radeon_renderbuffer *rrb;
uint32_t zbpitch;
rrb = radeon_get_depthbuffer(&r300->radeon);
if (!rrb)
return;
zbpitch = (rrb->pitch / rrb->cpp);
if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
zbpitch |= R300_DEPTHMACROTILE_ENABLE;
}
if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
zbpitch |= R300_DEPTHMICROTILE_TILED;
}
BEGIN_BATCH_NO_AUTOSTATE(6);
OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, zbpitch);
END_BATCH();
}
static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
BATCH_LOCALS(&r300->radeon);
struct radeon_renderbuffer *rrb;
uint32_t format = 0;
rrb = radeon_get_depthbuffer(&r300->radeon);
if (!rrb)
format = 0;
else {
if (rrb->cpp == 2)
format = R300_DEPTHFORMAT_16BIT_INT_Z;
else if (rrb->cpp == 4)
format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
}
OUT_BATCH(atom->cmd[0]);
atom->cmd[1] &= ~0xf;
atom->cmd[1] |= format;
OUT_BATCH(atom->cmd[1]);
OUT_BATCH(atom->cmd[2]);
OUT_BATCH(atom->cmd[3]);
OUT_BATCH(atom->cmd[4]);
}
static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
{
return atom->cmd_size;
}
static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
int cnt;
if (atom->cmd[0] == CP_PACKET2) {
return 0;
}
cnt = packet0_count(r300, atom->cmd);
return cnt ? cnt + 1 : 0;
}
int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
{
int cnt;
cnt = vpu_count(atom->cmd);
return cnt ? (cnt * 4) + 1 : 0;
}
int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom)
{
int cnt;
cnt = r500fp_count(atom->cmd);
return cnt ? (cnt * 6) + 1 : 0;
}
int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom)
{
int cnt;
cnt = r500fp_count(atom->cmd);
return cnt ? (cnt * 4) + 1 : 0;
}
#define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \
do { \
r300->hw.ATOM.cmd_size = (SZ); \
r300->hw.ATOM.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t)); \
r300->hw.ATOM.name = #ATOM; \
r300->hw.ATOM.idx = (IDX); \
r300->hw.ATOM.check = check_##CHK; \
r300->hw.ATOM.dirty = GL_FALSE; \
r300->radeon.hw.max_state_size += (SZ); \
insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM); \
} while (0)
/**
* Allocate memory for the command buffer and initialize the state atom
* list. Note that the initial hardware state is set by r300InitState().
*/
void r300InitCmdBuf(r300ContextPtr r300)
{
int mtu;
int has_tcl = 1;
int is_r500 = 0;
int i;
if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
has_tcl = 0;
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
is_r500 = 1;
r300->radeon.hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */
mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
if (RADEON_DEBUG & DEBUG_TEXTURE) {
fprintf(stderr, "Using %d maximum texture units..\n", mtu);
}
/* Setup the atom linked list */
make_empty_list(&r300->radeon.hw.atomlist);
r300->radeon.hw.atomlist.name = "atom-list";
/* Initialize state atoms */
ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0);
r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6);
ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0);
r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0;
r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1);
if (is_r500) {
ALLOC_STATE(vap_index_offset, always, 2, 0);
r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1);
r300->hw.vap_index_offset.cmd[1] = 0;
}
ALLOC_STATE(vte, always, 3, 0);
r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2);
ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0);
r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2);
ALLOC_STATE(vap_cntl_status, always, 2, 0);
r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1);
ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0);
r300->hw.vir[0].cmd[R300_VIR_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1);
ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1);
r300->hw.vir[1].cmd[R300_VIR_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0);
r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2);
ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0);
r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
if (has_tcl) {
ALLOC_STATE(vap_clip_cntl, always, 2, 0);
r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1);
ALLOC_STATE(vap_clip, always, 5, 0);
r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4);
ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0);
r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1);
}
ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0);
r300->hw.vof.cmd[R300_VOF_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2);
if (has_tcl) {
ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0);
r300->hw.pvs.cmd[R300_PVS_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3);
}
ALLOC_STATE(gb_enable, always, 2, 0);
r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1);
ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 5);
ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1);
ALLOC_STATE(ga_point_s0, always, 5, 0);
r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4);
ALLOC_STATE(ga_triangle_stipple, always, 2, 0);
r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1);
ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0);
r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1);
ALLOC_STATE(ga_point_minmax, always, 4, 0);
r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3);
ALLOC_STATE(lcntl, always, 2, 0);
r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1);
ALLOC_STATE(ga_line_stipple, always, 4, 0);
r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3);
ALLOC_STATE(shade, always, 5, 0);
r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 4);
ALLOC_STATE(polygon_mode, always, 4, 0);
r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3);
ALLOC_STATE(fogp, always, 3, 0);
r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2);
ALLOC_STATE(zbias_cntl, always, 2, 0);
r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1);
ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0);
r300->hw.zbs.cmd[R300_ZBS_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
ALLOC_STATE(occlusion_cntl, always, 2, 0);
r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1);
ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0);
r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1);
ALLOC_STATE(su_depth_scale, always, 3, 0);
r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2);
ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0);
r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2);
if (is_r500) {
ALLOC_STATE(ri, always, R500_RI_CMDSIZE, 0);
r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16);
for (i = 0; i < 8; i++) {
r300->hw.ri.cmd[R300_RI_CMD_0 + i +1] =
(R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
(R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
(R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
(R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
}
ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1);
} else {
ALLOC_STATE(ri, always, R300_RI_CMDSIZE, 0);
r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8);
ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1);
}
ALLOC_STATE(sc_hyperz, always, 3, 0);
r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2);
ALLOC_STATE(sc_screendoor, always, 2, 0);
r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
ALLOC_STATE(us_out_fmt, always, 6, 0);
r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5);
if (is_r500) {
ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0);
r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2);
r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO;
r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3);
r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1);
r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */
ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0);
r300->hw.r500fp.cmd[R300_FPI_CMD_0] =
cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0);
r300->hw.r500fp.emit = emit_r500fp;
ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0);
r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] =
cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0);
r300->hw.r500fp_const.emit = emit_r500fp;
} else {
ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3);
r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4);
ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0);
ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1);
ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1);
ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1);
ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1);
ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0);
}
ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0);
r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1);
ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0);
r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3);
ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0);
r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2);
ALLOC_STATE(fg_depth_src, always, 2, 0);
r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1);
ALLOC_STATE(rb3d_cctl, always, 2, 0);
r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1);
ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0);
r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2);
ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0);
r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1);
if (is_r500) {
ALLOC_STATE(blend_color, always, 3, 0);
r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2);
} else {
ALLOC_STATE(blend_color, always, 2, 0);
r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1);
}
ALLOC_STATE(rop, always, 2, 0);
r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1);
ALLOC_STATE(cb, always, R300_CB_CMDSIZE, 0);
r300->hw.cb.emit = &emit_cb_offset;
ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
r300->hw.zs.cmd[R300_ZS_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
ALLOC_STATE(zstencil_format, always, 5, 0);
r300->hw.zstencil_format.cmd[0] =
cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4);
r300->hw.zstencil_format.emit = emit_zstencil_format;
ALLOC_STATE(zb, always, R300_ZB_CMDSIZE, 0);
r300->hw.zb.emit = emit_zb_offset;
ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1);
ALLOC_STATE(unk4F30, always, 3, 0);
r300->hw.unk4F30.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4F30, 2);
ALLOC_STATE(zb_hiz_offset, always, 2, 0);
r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1);
ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1);
/* VPU only on TCL */
if (has_tcl) {
int i;
ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
r300->hw.vpi.cmd[0] =
cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
r300->hw.vpi.emit = emit_vpu;
if (is_r500) {
ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
r300->hw.vpp.cmd[0] =
cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
r300->hw.vpp.emit = emit_vpu;
ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
r300->hw.vps.cmd[0] =
cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1);
r300->hw.vps.emit = emit_vpu;
for (i = 0; i < 6; i++) {
ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
r300->hw.vpucp[i].cmd[0] =
cmdvpu(r300->radeon.radeonScreen,
R500_PVS_UCP_START + i, 1);
r300->hw.vpucp[i].emit = emit_vpu;
}
} else {
ALLOC_STATE(vpp, vpu, R300_VPP_CMDSIZE, 0);
r300->hw.vpp.cmd[0] =
cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
r300->hw.vpp.emit = emit_vpu;
ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
r300->hw.vps.cmd[0] =
cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1);
r300->hw.vps.emit = emit_vpu;
for (i = 0; i < 6; i++) {
ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
r300->hw.vpucp[i].cmd[0] =
cmdvpu(r300->radeon.radeonScreen,
R300_PVS_UCP_START + i, 1);
r300->hw.vpucp[i].emit = emit_vpu;
}
}
}
/* Textures */
ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0);
ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0);
ALLOC_STATE(tex.size, variable, mtu + 1, 0);
r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0);
ALLOC_STATE(tex.format, variable, mtu + 1, 0);
r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0);
ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0);
ALLOC_STATE(tex.offset, variable, 1, 0);
r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0);
r300->hw.tex.offset.emit = &emit_tex_offsets;
ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0);
ALLOC_STATE(tex.border_color, variable, mtu + 1, 0);
r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0);
r300->radeon.hw.is_dirty = GL_TRUE;
r300->radeon.hw.all_dirty = GL_TRUE;
rcommonInitCmdBuf(&r300->radeon);
}

View File

@ -0,0 +1,50 @@
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/**
* \file
*
* \author Nicolai Haehnle <prefect_@gmx.net>
*/
#ifndef __R600_CMDBUF_H__
#define __R600_CMDBUF_H__
#include "r600_context.h"
extern void r300InitCmdBuf(r300ContextPtr r300);
void emit_vpu(GLcontext *ctx, struct radeon_state_atom * atom);
int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom);
void emit_r500fp(GLcontext *ctx, struct radeon_state_atom * atom);
int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom);
int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom);
#endif /* __R600_CMDBUF_H__ */

View File

@ -0,0 +1,472 @@
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/**
* \file
*
* \author Keith Whitwell <keith@tungstengraphics.com>
*
* \author Nicolai Haehnle <prefect_@gmx.net>
*/
#include "main/glheader.h"
#include "main/api_arrayelt.h"
#include "main/context.h"
#include "main/simple_list.h"
#include "main/imports.h"
#include "main/matrix.h"
#include "main/extensions.h"
#include "main/state.h"
#include "main/bufferobj.h"
#include "main/texobj.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
#include "vbo/vbo.h"
#include "tnl/tnl.h"
#include "tnl/t_pipeline.h"
#include "tnl/t_vp_build.h"
#include "drivers/common/driverfuncs.h"
#include "r600_context.h"
#include "radeon_context.h"
#include "radeon_span.h"
#include "r600_cmdbuf.h"
#include "r600_state.h"
#include "r600_ioctl.h"
#include "r600_tex.h"
#include "r600_emit.h"
#include "r600_swtcl.h"
#include "radeon_bocs_wrapper.h"
#include "vblank.h"
#include "utils.h"
#include "xmlpool.h" /* for symbolic values of enum-type options */
/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */
int future_hw_tcl_on = 1;
int hw_tcl_on = 1;
#define need_GL_VERSION_2_0
#define need_GL_ARB_point_parameters
#define need_GL_ARB_vertex_program
#define need_GL_EXT_blend_equation_separate
#define need_GL_EXT_blend_func_separate
#define need_GL_EXT_blend_minmax
#define need_GL_EXT_framebuffer_object
#define need_GL_EXT_fog_coord
#define need_GL_EXT_gpu_program_parameters
#define need_GL_EXT_secondary_color
#define need_GL_EXT_stencil_two_side
#define need_GL_ATI_separate_stencil
#define need_GL_NV_vertex_program
#include "extension_helper.h"
const struct dri_extension card_extensions[] = {
/* *INDENT-OFF* */
{"GL_ARB_depth_texture", NULL},
{"GL_ARB_fragment_program", NULL},
{"GL_ARB_multitexture", NULL},
{"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
{"GL_ARB_shadow", NULL},
{"GL_ARB_shadow_ambient", NULL},
{"GL_ARB_texture_border_clamp", NULL},
{"GL_ARB_texture_cube_map", NULL},
{"GL_ARB_texture_env_add", NULL},
{"GL_ARB_texture_env_combine", NULL},
{"GL_ARB_texture_env_crossbar", NULL},
{"GL_ARB_texture_env_dot3", NULL},
{"GL_ARB_texture_mirrored_repeat", NULL},
{"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
{"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
{"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
{"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
{"GL_EXT_blend_subtract", NULL},
{"GL_EXT_packed_depth_stencil", NULL},
{"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
{"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
{"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
{"GL_EXT_shadow_funcs", NULL},
{"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions},
{"GL_EXT_stencil_wrap", NULL},
{"GL_EXT_texture_edge_clamp", NULL},
{"GL_EXT_texture_env_combine", NULL},
{"GL_EXT_texture_env_dot3", NULL},
{"GL_EXT_texture_filter_anisotropic", NULL},
{"GL_EXT_texture_lod_bias", NULL},
{"GL_EXT_texture_mirror_clamp", NULL},
{"GL_EXT_texture_rectangle", NULL},
{"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions},
{"GL_ATI_texture_env_combine3", NULL},
{"GL_ATI_texture_mirror_once", NULL},
{"GL_MESA_pack_invert", NULL},
{"GL_MESA_ycbcr_texture", NULL},
{"GL_MESAX_texture_float", NULL},
{"GL_NV_blend_square", NULL},
{"GL_NV_vertex_program", GL_NV_vertex_program_functions},
{"GL_SGIS_generate_mipmap", NULL},
{NULL, NULL}
/* *INDENT-ON* */
};
const struct dri_extension mm_extensions[] = {
{ "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
{ NULL, NULL }
};
/**
* The GL 2.0 functions are needed to make display lists work with
* functions added by GL_ATI_separate_stencil.
*/
const struct dri_extension gl_20_extension[] = {
{"GL_VERSION_2_0", GL_VERSION_2_0_functions },
};
extern struct tnl_pipeline_stage _r300_render_stage;
extern const struct tnl_pipeline_stage _r300_tcl_stage;
static const struct tnl_pipeline_stage *r300_pipeline[] = {
/* Try and go straight to t&l
*/
&_r300_tcl_stage,
/* Catch any t&l fallbacks
*/
&_tnl_vertex_transform_stage,
&_tnl_normal_transform_stage,
&_tnl_lighting_stage,
&_tnl_fog_coordinate_stage,
&_tnl_texgen_stage,
&_tnl_texture_transform_stage,
&_tnl_vertex_program_stage,
/* Try again to go to tcl?
* - no good for asymmetric-twoside (do with multipass)
* - no good for asymmetric-unfilled (do with multipass)
* - good for material
* - good for texgen
* - need to manipulate a bit of state
*
* - worth it/not worth it?
*/
/* Else do them here.
*/
&_r300_render_stage,
&_tnl_render_stage, /* FALLBACK */
0,
};
static void r300RunPipeline(GLcontext * ctx)
{
_mesa_lock_context_textures(ctx);
if (ctx->NewState)
_mesa_update_state_locked(ctx);
_tnl_run_pipeline(ctx);
_mesa_unlock_context_textures(ctx);
}
static void r300_get_lock(radeonContextPtr rmesa)
{
drm_radeon_sarea_t *sarea = rmesa->sarea;
if (sarea->ctx_owner != rmesa->dri.hwContext) {
sarea->ctx_owner = rmesa->dri.hwContext;
if (!rmesa->radeonScreen->kernel_mm)
radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
}
}
static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
{
/* please flush pipe do all pending work */
radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
R300_SC_SCREENDOOR, 1));
radeon_cs_write_dword(cs, 0x0);
radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
R300_SC_SCREENDOOR, 1));
radeon_cs_write_dword(cs, 0x00FFFFFF);
radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
R300_SC_HYPERZ, 1));
radeon_cs_write_dword(cs, 0x0);
radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
R300_US_CONFIG, 1));
radeon_cs_write_dword(cs, 0x0);
radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
R300_ZB_CNTL, 1));
radeon_cs_write_dword(cs, 0x0);
radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D));
radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
R300_RB3D_DSTCACHE_CTLSTAT, 1));
radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
R300_ZB_ZCACHE_CTLSTAT, 1));
radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen,
R300_WAIT_3D | R300_WAIT_3D_CLEAN));
}
static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon)
{
r300ContextPtr r300 = (r300ContextPtr)radeon;
BATCH_LOCALS(radeon);
r300->vap_flush_needed = GL_TRUE;
cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
BEGIN_BATCH_NO_AUTOSTATE(2);
OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH);
END_BATCH();
end_3d(radeon);
}
static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
if (mode)
r300->radeon.Fallback |= bit;
else
r300->radeon.Fallback &= ~bit;
}
static void r300_init_vtbl(radeonContextPtr radeon)
{
radeon->vtbl.get_lock = r300_get_lock;
radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset;
radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header;
radeon->vtbl.swtcl_flush = r300_swtcl_flush;
radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms;
radeon->vtbl.fallback = r300_fallback;
}
/* Create the device specific rendering context.
*/
GLboolean r300CreateContext(const __GLcontextModes * glVisual,
__DRIcontextPrivate * driContextPriv,
void *sharedContextPrivate)
{
__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
struct dd_function_table functions;
r300ContextPtr r300;
GLcontext *ctx;
int tcl_mode;
assert(glVisual);
assert(driContextPriv);
assert(screen);
/* Allocate the R300 context */
r300 = (r300ContextPtr) CALLOC(sizeof(*r300));
if (!r300)
return GL_FALSE;
if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
hw_tcl_on = future_hw_tcl_on = 0;
r300_init_vtbl(&r300->radeon);
/* Parse configuration files.
* Do this here so that initialMaxAnisotropy is set before we create
* the default textures.
*/
driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache,
screen->driScreen->myNum, "r300");
r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache,
"def_max_anisotropy");
/* Init default driver functions then plug in our R300-specific functions
* (the texture functions are especially important)
*/
_mesa_init_driver_functions(&functions);
r300InitIoctlFuncs(&functions);
r300InitStateFuncs(&functions);
r300InitTextureFuncs(&functions);
r300InitShaderFuncs(&functions);
if (!radeonInitContext(&r300->radeon, &functions,
glVisual, driContextPriv,
sharedContextPrivate)) {
FREE(r300);
return GL_FALSE;
}
/* Init r300 context data */
/* Set the maximum texture size small enough that we can guarentee that
* all texture units can bind a maximal texture and have them both in
* texturable memory at once.
*/
ctx = r300->radeon.glCtx;
ctx->Const.MaxTextureImageUnits =
driQueryOptioni(&r300->radeon.optionCache, "texture_image_units");
ctx->Const.MaxTextureCoordUnits =
driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units");
ctx->Const.MaxTextureUnits =
MIN2(ctx->Const.MaxTextureImageUnits,
ctx->Const.MaxTextureCoordUnits);
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
ctx->Const.MaxTextureLodBias = 16.0;
if (screen->chip_family >= CHIP_FAMILY_RV515) {
ctx->Const.MaxTextureLevels = 13;
ctx->Const.MaxTextureRectSize = 4096;
}
ctx->Const.MinPointSize = 1.0;
ctx->Const.MinPointSizeAA = 1.0;
ctx->Const.MaxPointSize = R300_POINTSIZE_MAX;
ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX;
ctx->Const.MinLineWidth = 1.0;
ctx->Const.MinLineWidthAA = 1.0;
ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
/* Needs further modifications */
#if 0
ctx->Const.MaxArrayLockSize =
( /*512 */ RADEON_BUFFER_SIZE * 16 * 1024) / (4 * 4);
#endif
ctx->Const.MaxDrawBuffers = 1;
/* Initialize the software rasterizer and helper modules.
*/
_swrast_CreateContext(ctx);
_vbo_CreateContext(ctx);
_tnl_CreateContext(ctx);
_swsetup_CreateContext(ctx);
_swsetup_Wakeup(ctx);
_ae_create_context(ctx);
/* Install the customized pipeline:
*/
_tnl_destroy_pipeline(ctx);
_tnl_install_pipeline(ctx, r300_pipeline);
/* Try and keep materials and vertices separate:
*/
/* _tnl_isolate_materials(ctx, GL_TRUE); */
/* Configure swrast and TNL to match hardware characteristics:
*/
_swrast_allow_pixel_fog(ctx, GL_FALSE);
_swrast_allow_vertex_fog(ctx, GL_TRUE);
_tnl_allow_pixel_fog(ctx, GL_FALSE);
_tnl_allow_vertex_fog(ctx, GL_TRUE);
/* currently bogus data */
if (screen->chip_flags & RADEON_CHIPSET_TCL) {
ctx->Const.VertexProgram.MaxInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
ctx->Const.VertexProgram.MaxNativeInstructions =
VSF_MAX_FRAGMENT_LENGTH / 4;
ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */
ctx->Const.VertexProgram.MaxTemps = 32;
ctx->Const.VertexProgram.MaxNativeTemps =
/*VSF_MAX_FRAGMENT_TEMPS */ 32;
ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */
ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
}
ctx->Const.FragmentProgram.MaxNativeTemps = PFS_NUM_TEMP_REGS;
ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */
ctx->Const.FragmentProgram.MaxNativeParameters = PFS_NUM_CONST_REGS;
ctx->Const.FragmentProgram.MaxNativeAluInstructions = PFS_MAX_ALU_INST;
ctx->Const.FragmentProgram.MaxNativeTexInstructions = PFS_MAX_TEX_INST;
ctx->Const.FragmentProgram.MaxNativeInstructions =
PFS_MAX_ALU_INST + PFS_MAX_TEX_INST;
ctx->Const.FragmentProgram.MaxNativeTexIndirections =
PFS_MAX_TEX_INDIRECT;
ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0; /* and these are?? */
ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
driInitExtensions(ctx, card_extensions, GL_TRUE);
if (r300->radeon.radeonScreen->kernel_mm)
driInitExtensions(ctx, mm_extensions, GL_FALSE);
if (driQueryOptionb
(&r300->radeon.optionCache, "disable_stencil_two_side"))
_mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
if (r300->radeon.glCtx->Mesa_DXTn
&& !driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc")) {
_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
_mesa_enable_extension(ctx, "GL_S3_s3tc");
} else
if (driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable"))
{
_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
}
r300->disable_lowimpact_fallback =
driQueryOptionb(&r300->radeon.optionCache,
"disable_lowimpact_fallback");
radeon_fbo_init(&r300->radeon);
radeonInitSpanFuncs( ctx );
r300InitCmdBuf(r300);
r300InitState(r300);
if (!(screen->chip_flags & RADEON_CHIPSET_TCL))
r300InitSwtcl(ctx);
TNL_CONTEXT(ctx)->Driver.RunPipeline = r300RunPipeline;
tcl_mode = driQueryOptioni(&r300->radeon.optionCache, "tcl_mode");
if (driQueryOptionb(&r300->radeon.optionCache, "no_rast")) {
fprintf(stderr, "disabling 3D acceleration\n");
#if R200_MERGED
FALLBACK(&r300->radeon, RADEON_FALLBACK_DISABLE, 1);
#endif
}
if (tcl_mode == DRI_CONF_TCL_SW ||
!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL)) {
if (r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
r300->radeon.radeonScreen->chip_flags &=
~RADEON_CHIPSET_TCL;
fprintf(stderr, "Disabling HW TCL support\n");
}
TCL_FALLBACK(r300->radeon.glCtx,
RADEON_TCL_FALLBACK_TCL_DISABLE, 1);
}
return GL_TRUE;
}

View File

@ -0,0 +1,716 @@
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/**
* \file
*
* \author Keith Whitwell <keith@tungstengraphics.com>
* \author Nicolai Haehnle <prefect_@gmx.net>
*/
#ifndef __R600_CONTEXT_H__
#define __R600_CONTEXT_H__
#include "tnl/t_vertex.h"
#include "drm.h"
#include "radeon_drm.h"
#include "dri_util.h"
#include "texmem.h"
#include "radeon_common.h"
#include "main/macros.h"
#include "main/mtypes.h"
#include "main/colormac.h"
struct r300_context;
typedef struct r300_context r300ContextRec;
typedef struct r300_context *r300ContextPtr;
#include "main/mm.h"
/* From http://gcc. gnu.org/onlinedocs/gcc-3.2.3/gcc/Variadic-Macros.html .
I suppose we could inline this and use macro to fetch out __LINE__ and stuff in case we run into trouble
with other compilers ... GLUE!
*/
#define WARN_ONCE(a, ...) { \
static int warn##__LINE__=1; \
if(warn##__LINE__){ \
fprintf(stderr, "*********************************WARN_ONCE*********************************\n"); \
fprintf(stderr, "File %s function %s line %d\n", \
__FILE__, __FUNCTION__, __LINE__); \
fprintf(stderr, a, ## __VA_ARGS__);\
fprintf(stderr, "***************************************************************************\n"); \
warn##__LINE__=0;\
} \
}
#include "r600_vertprog.h"
#include "r700_fragprog.h"
/************ DMA BUFFERS **************/
/* The blit width for texture uploads
*/
#define R300_BLIT_WIDTH_BYTES 1024
#define R300_MAX_TEXTURE_UNITS 8
struct r300_texture_state {
int tc_count; /* number of incoming texture coordinates from VAP */
};
#define R300_VPT_CMD_0 0
#define R300_VPT_XSCALE 1
#define R300_VPT_XOFFSET 2
#define R300_VPT_YSCALE 3
#define R300_VPT_YOFFSET 4
#define R300_VPT_ZSCALE 5
#define R300_VPT_ZOFFSET 6
#define R300_VPT_CMDSIZE 7
#define R300_VIR_CMD_0 0 /* vir is variable size (at least 1) */
#define R300_VIR_CNTL_0 1
#define R300_VIR_CNTL_1 2
#define R300_VIR_CNTL_2 3
#define R300_VIR_CNTL_3 4
#define R300_VIR_CNTL_4 5
#define R300_VIR_CNTL_5 6
#define R300_VIR_CNTL_6 7
#define R300_VIR_CNTL_7 8
#define R300_VIR_CMDSIZE 9
#define R300_VIC_CMD_0 0
#define R300_VIC_CNTL_0 1
#define R300_VIC_CNTL_1 2
#define R300_VIC_CMDSIZE 3
#define R300_VOF_CMD_0 0
#define R300_VOF_CNTL_0 1
#define R300_VOF_CNTL_1 2
#define R300_VOF_CMDSIZE 3
#define R300_PVS_CMD_0 0
#define R300_PVS_CNTL_1 1
#define R300_PVS_CNTL_2 2
#define R300_PVS_CNTL_3 3
#define R300_PVS_CMDSIZE 4
#define R300_GB_MISC_CMD_0 0
#define R300_GB_MISC_MSPOS_0 1
#define R300_GB_MISC_MSPOS_1 2
#define R300_GB_MISC_TILE_CONFIG 3
#define R300_GB_MISC_SELECT 4
#define R300_GB_MISC_AA_CONFIG 5
#define R300_GB_MISC_CMDSIZE 6
#define R300_TXE_CMD_0 0
#define R300_TXE_ENABLE 1
#define R300_TXE_CMDSIZE 2
#define R300_PS_CMD_0 0
#define R300_PS_POINTSIZE 1
#define R300_PS_CMDSIZE 2
#define R300_ZBS_CMD_0 0
#define R300_ZBS_T_FACTOR 1
#define R300_ZBS_T_CONSTANT 2
#define R300_ZBS_W_FACTOR 3
#define R300_ZBS_W_CONSTANT 4
#define R300_ZBS_CMDSIZE 5
#define R300_CUL_CMD_0 0
#define R300_CUL_CULL 1
#define R300_CUL_CMDSIZE 2
#define R300_RC_CMD_0 0
#define R300_RC_CNTL_0 1
#define R300_RC_CNTL_1 2
#define R300_RC_CMDSIZE 3
#define R300_RI_CMD_0 0
#define R300_RI_INTERP_0 1
#define R300_RI_INTERP_1 2
#define R300_RI_INTERP_2 3
#define R300_RI_INTERP_3 4
#define R300_RI_INTERP_4 5
#define R300_RI_INTERP_5 6
#define R300_RI_INTERP_6 7
#define R300_RI_INTERP_7 8
#define R300_RI_CMDSIZE 9
#define R500_RI_CMDSIZE 17
#define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */
#define R300_RR_INST_0 1
#define R300_RR_INST_1 2
#define R300_RR_INST_2 3
#define R300_RR_INST_3 4
#define R300_RR_INST_4 5
#define R300_RR_INST_5 6
#define R300_RR_INST_6 7
#define R300_RR_INST_7 8
#define R300_RR_CMDSIZE 9
#define R300_FP_CMD_0 0
#define R300_FP_CNTL0 1
#define R300_FP_CNTL1 2
#define R300_FP_CNTL2 3
#define R300_FP_CMD_1 4
#define R300_FP_NODE0 5
#define R300_FP_NODE1 6
#define R300_FP_NODE2 7
#define R300_FP_NODE3 8
#define R300_FP_CMDSIZE 9
#define R500_FP_CMD_0 0
#define R500_FP_CNTL 1
#define R500_FP_PIXSIZE 2
#define R500_FP_CMD_1 3
#define R500_FP_CODE_ADDR 4
#define R500_FP_CODE_RANGE 5
#define R500_FP_CODE_OFFSET 6
#define R500_FP_CMD_2 7
#define R500_FP_FC_CNTL 8
#define R500_FP_CMDSIZE 9
#define R300_FPT_CMD_0 0
#define R300_FPT_INSTR_0 1
#define R300_FPT_CMDSIZE 65
#define R300_FPI_CMD_0 0
#define R300_FPI_INSTR_0 1
#define R300_FPI_CMDSIZE 65
/* R500 has space for 512 instructions - 6 dwords per instruction */
#define R500_FPI_CMDSIZE (512*6+1)
#define R300_FPP_CMD_0 0
#define R300_FPP_PARAM_0 1
#define R300_FPP_CMDSIZE (32*4+1)
/* R500 has spcae for 256 constants - 4 dwords per constant */
#define R500_FPP_CMDSIZE (256*4+1)
#define R300_FOGS_CMD_0 0
#define R300_FOGS_STATE 1
#define R300_FOGS_CMDSIZE 2
#define R300_FOGC_CMD_0 0
#define R300_FOGC_R 1
#define R300_FOGC_G 2
#define R300_FOGC_B 3
#define R300_FOGC_CMDSIZE 4
#define R300_FOGP_CMD_0 0
#define R300_FOGP_SCALE 1
#define R300_FOGP_START 2
#define R300_FOGP_CMDSIZE 3
#define R300_AT_CMD_0 0
#define R300_AT_ALPHA_TEST 1
#define R300_AT_UNKNOWN 2
#define R300_AT_CMDSIZE 3
#define R300_BLD_CMD_0 0
#define R300_BLD_CBLEND 1
#define R300_BLD_ABLEND 2
#define R300_BLD_CMDSIZE 3
#define R300_CMK_CMD_0 0
#define R300_CMK_COLORMASK 1
#define R300_CMK_CMDSIZE 2
#define R300_CB_CMD_0 0
#define R300_CB_OFFSET 1
#define R300_CB_CMD_1 2
#define R300_CB_PITCH 3
#define R300_CB_CMDSIZE 4
#define R300_ZS_CMD_0 0
#define R300_ZS_CNTL_0 1
#define R300_ZS_CNTL_1 2
#define R300_ZS_CNTL_2 3
#define R300_ZS_CMDSIZE 4
#define R300_ZB_CMD_0 0
#define R300_ZB_OFFSET 1
#define R300_ZB_PITCH 2
#define R300_ZB_CMDSIZE 3
#define R300_VAP_CNTL_FLUSH 0
#define R300_VAP_CNTL_FLUSH_1 1
#define R300_VAP_CNTL_CMD 2
#define R300_VAP_CNTL_INSTR 3
#define R300_VAP_CNTL_SIZE 4
#define R300_VPI_CMD_0 0
#define R300_VPI_INSTR_0 1
#define R300_VPI_CMDSIZE 1025 /* 256 16 byte instructions */
#define R300_VPP_CMD_0 0
#define R300_VPP_PARAM_0 1
#define R300_VPP_CMDSIZE 1025 /* 256 4-component parameters */
#define R300_VPUCP_CMD_0 0
#define R300_VPUCP_X 1
#define R300_VPUCP_Y 2
#define R300_VPUCP_Z 3
#define R300_VPUCP_W 4
#define R300_VPUCP_CMDSIZE 5 /* 256 4-component parameters */
#define R300_VPS_CMD_0 0
#define R300_VPS_ZERO_0 1
#define R300_VPS_ZERO_1 2
#define R300_VPS_POINTSIZE 3
#define R300_VPS_ZERO_3 4
#define R300_VPS_CMDSIZE 5
/* the layout is common for all fields inside tex */
#define R300_TEX_CMD_0 0
#define R300_TEX_VALUE_0 1
/* We don't really use this, instead specify mtu+1 dynamically
#define R300_TEX_CMDSIZE (MAX_TEXTURE_UNITS+1)
*/
/**
* Cache for hardware register state.
*/
struct r300_hw_state {
struct radeon_state_atom vpt; /* viewport (1D98) */
struct radeon_state_atom vap_cntl;
struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */
struct radeon_state_atom vof; /* VAP output format register 0x2090 */
struct radeon_state_atom vte; /* (20B0) */
struct radeon_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */
struct radeon_state_atom vap_cntl_status;
struct radeon_state_atom vir[2]; /* vap input route (2150/21E0) */
struct radeon_state_atom vic; /* vap input control (2180) */
struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
struct radeon_state_atom vap_clip_cntl;
struct radeon_state_atom vap_clip;
struct radeon_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */
struct radeon_state_atom pvs; /* pvs_cntl (22D0) */
struct radeon_state_atom gb_enable; /* (4008) */
struct radeon_state_atom gb_misc; /* Multisampling position shifts ? (4010) */
struct radeon_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
struct radeon_state_atom ga_triangle_stipple; /* (4214) */
struct radeon_state_atom ps; /* pointsize (421C) */
struct radeon_state_atom ga_point_minmax; /* (4230) */
struct radeon_state_atom lcntl; /* line control */
struct radeon_state_atom ga_line_stipple; /* (4260) */
struct radeon_state_atom shade;
struct radeon_state_atom polygon_mode;
struct radeon_state_atom fogp; /* fog parameters (4294) */
struct radeon_state_atom ga_soft_reset; /* (429C) */
struct radeon_state_atom zbias_cntl;
struct radeon_state_atom zbs; /* zbias (42A4) */
struct radeon_state_atom occlusion_cntl;
struct radeon_state_atom cul; /* cull cntl (42B8) */
struct radeon_state_atom su_depth_scale; /* (42C0) */
struct radeon_state_atom rc; /* rs control (4300) */
struct radeon_state_atom ri; /* rs interpolators (4310) */
struct radeon_state_atom rr; /* rs route (4330) */
struct radeon_state_atom sc_hyperz; /* (43A4) */
struct radeon_state_atom sc_screendoor; /* (43E8) */
struct radeon_state_atom fp; /* fragment program cntl + nodes (4600) */
struct radeon_state_atom fpt; /* texi - (4620) */
struct radeon_state_atom us_out_fmt; /* (46A4) */
struct radeon_state_atom r500fp; /* r500 fp instructions */
struct radeon_state_atom r500fp_const; /* r500 fp constants */
struct radeon_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */
struct radeon_state_atom fogs; /* fog state (4BC0) */
struct radeon_state_atom fogc; /* fog color (4BC8) */
struct radeon_state_atom at; /* alpha test (4BD4) */
struct radeon_state_atom fg_depth_src; /* (4BD8) */
struct radeon_state_atom fpp; /* 0x4C00 and following */
struct radeon_state_atom rb3d_cctl; /* (4E00) */
struct radeon_state_atom bld; /* blending (4E04) */
struct radeon_state_atom cmk; /* colormask (4E0C) */
struct radeon_state_atom blend_color; /* constant blend color */
struct radeon_state_atom rop; /* ropcntl */
struct radeon_state_atom cb; /* colorbuffer (4E28) */
struct radeon_state_atom rb3d_dither_ctl; /* (4E50) */
struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */
struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */
struct radeon_state_atom zs; /* zstencil control (4F00) */
struct radeon_state_atom zstencil_format;
struct radeon_state_atom zb; /* z buffer (4F20) */
struct radeon_state_atom zb_depthclearvalue; /* (4F28) */
struct radeon_state_atom unk4F30; /* (4F30) */
struct radeon_state_atom zb_hiz_offset; /* (4F44) */
struct radeon_state_atom zb_hiz_pitch; /* (4F54) */
struct radeon_state_atom vpi; /* vp instructions */
struct radeon_state_atom vpp; /* vp parameters */
struct radeon_state_atom vps; /* vertex point size (?) */
struct radeon_state_atom vpucp[6]; /* vp user clip plane - 6 */
/* 8 texture units */
/* the state is grouped by function and not by
texture unit. This makes single unit updates
really awkward - we are much better off
updating the whole thing at once */
struct {
struct radeon_state_atom filter;
struct radeon_state_atom filter_1;
struct radeon_state_atom size;
struct radeon_state_atom format;
struct radeon_state_atom pitch;
struct radeon_state_atom offset;
struct radeon_state_atom chroma_key;
struct radeon_state_atom border_color;
} tex;
struct radeon_state_atom txe; /* tex enable (4104) */
radeonTexObj *textures[R300_MAX_TEXTURE_UNITS];
};
/**
* State cache
*/
/* Vertex shader state */
/* Perhaps more if we store programs in vmem? */
/* drm_r300_cmd_header_t->vpu->count is unsigned char */
#define VSF_MAX_FRAGMENT_LENGTH (255*4)
/* Can be tested with colormat currently. */
#define VSF_MAX_FRAGMENT_TEMPS (14)
#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
#define STATE_R300_TEXRECT_FACTOR (STATE_INTERNAL_DRIVER+1)
struct r300_vertex_shader_fragment {
int length;
union {
GLuint d[VSF_MAX_FRAGMENT_LENGTH];
float f[VSF_MAX_FRAGMENT_LENGTH];
GLuint i[VSF_MAX_FRAGMENT_LENGTH];
} body;
};
struct r300_vertex_shader_state {
struct r300_vertex_shader_fragment program;
};
extern int hw_tcl_on;
#define COLOR_IS_RGBA
#define TAG(x) r300##x
#include "tnl_dd/t_dd_vertex.h"
#undef TAG
//#define CURRENT_VERTEX_SHADER(ctx) (ctx->VertexProgram._Current)
#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->selected_vp)
/* Should but doesnt work */
//#define CURRENT_VERTEX_SHADER(ctx) (R300_CONTEXT(ctx)->curr_vp)
/* r300_vertex_shader_state and r300_vertex_program should probably be merged together someday.
* Keeping them them seperate for now should ensure fixed pipeline keeps functioning properly.
*/
struct r300_vertex_program_key {
GLuint InputsRead;
GLuint OutputsWritten;
GLuint OutputsAdded;
};
struct r300_vertex_program {
struct r300_vertex_program *next;
struct r300_vertex_program_key key;
int translated;
struct r300_vertex_shader_fragment program;
int pos_end;
int num_temporaries; /* Number of temp vars used by program */
int wpos_idx;
int inputs[VERT_ATTRIB_MAX];
int outputs[VERT_RESULT_MAX];
int native;
int ref_count;
int use_ref_count;
};
struct r300_vertex_program_cont {
struct gl_vertex_program mesa_program; /* Must be first */
struct r300_vertex_shader_fragment params;
struct r300_vertex_program *progs;
};
#define PFS_MAX_ALU_INST 64
#define PFS_MAX_TEX_INST 64
#define PFS_MAX_TEX_INDIRECT 4
#define PFS_NUM_TEMP_REGS 32
#define PFS_NUM_CONST_REGS 16
struct r300_pfs_compile_state;
/**
* Stores state that influences the compilation of a fragment program.
*/
struct r300_fragment_program_external_state {
struct {
/**
* If the sampler is used as a shadow sampler,
* this field is:
* 0 - GL_LUMINANCE
* 1 - GL_INTENSITY
* 2 - GL_ALPHA
* depending on the depth texture mode.
*/
GLuint depth_texture_mode : 2;
/**
* If the sampler is used as a shadow sampler,
* this field is (texture_compare_func - GL_NEVER).
* [e.g. if compare function is GL_LEQUAL, this field is 3]
*
* Otherwise, this field is 0.
*/
GLuint texture_compare_func : 3;
} unit[16];
};
struct r300_fragment_program_node {
int tex_offset; /**< first tex instruction */
int tex_end; /**< last tex instruction, relative to tex_offset */
int alu_offset; /**< first ALU instruction */
int alu_end; /**< last ALU instruction, relative to alu_offset */
int flags;
};
/**
* Stores an R300 fragment program in its compiled-to-hardware form.
*/
struct r300_fragment_program_code {
struct {
int length; /**< total # of texture instructions used */
GLuint inst[PFS_MAX_TEX_INST];
} tex;
struct {
int length; /**< total # of ALU instructions used */
struct {
GLuint inst0;
GLuint inst1;
GLuint inst2;
GLuint inst3;
} inst[PFS_MAX_ALU_INST];
} alu;
struct r300_fragment_program_node node[4];
int cur_node;
int first_node_has_tex;
/**
* Remember which program register a given hardware constant
* belongs to.
*/
struct prog_src_register constant[PFS_NUM_CONST_REGS];
int const_nr;
int max_temp_idx;
};
/**
* Store everything about a fragment program that is needed
* to render with that program.
*/
struct r300_fragment_program {
struct gl_fragment_program mesa_program;
GLboolean translated;
GLboolean error;
struct r300_fragment_program_external_state state;
struct r300_fragment_program_code code;
GLboolean WritesDepth;
GLuint optimization;
};
struct r500_pfs_compile_state;
struct r500_fragment_program_external_state {
struct {
/**
* If the sampler is used as a shadow sampler,
* this field is:
* 0 - GL_LUMINANCE
* 1 - GL_INTENSITY
* 2 - GL_ALPHA
* depending on the depth texture mode.
*/
GLuint depth_texture_mode : 2;
/**
* If the sampler is used as a shadow sampler,
* this field is (texture_compare_func - GL_NEVER).
* [e.g. if compare function is GL_LEQUAL, this field is 3]
*
* Otherwise, this field is 0.
*/
GLuint texture_compare_func : 3;
} unit[16];
};
struct r500_fragment_program_code {
struct {
GLuint inst0;
GLuint inst1;
GLuint inst2;
GLuint inst3;
GLuint inst4;
GLuint inst5;
} inst[512];
int inst_offset;
int inst_end;
/**
* Remember which program register a given hardware constant
* belongs to.
*/
struct prog_src_register constant[PFS_NUM_CONST_REGS];
int const_nr;
int max_temp_idx;
};
struct r500_fragment_program {
struct gl_fragment_program mesa_program;
GLcontext *ctx;
GLboolean translated;
GLboolean error;
struct r500_fragment_program_external_state state;
struct r500_fragment_program_code code;
GLboolean writes_depth;
GLuint optimization;
};
#define R300_MAX_AOS_ARRAYS 16
#define REG_COORDS 0
#define REG_COLOR0 1
#define REG_TEX0 2
struct r300_state {
struct r300_texture_state texture;
int sw_tcl_inputs[VERT_ATTRIB_MAX];
struct r300_vertex_shader_state vertex_shader;
DECLARE_RENDERINPUTS(render_inputs_bitset); /* actual render inputs that R300 was configured for.
They are the same as tnl->render_inputs for fixed pipeline */
};
#define R300_FALLBACK_NONE 0
#define R300_FALLBACK_TCL 1
#define R300_FALLBACK_RAST 2
/* r300_swtcl.c
*/
struct r300_swtcl_info {
/*
* Offset of the 4UB color data within a hardware (swtcl) vertex.
*/
GLuint coloroffset;
/**
* Offset of the 3UB specular color data within a hardware (swtcl) vertex.
*/
GLuint specoffset;
struct vertex_attribute{
GLuint attr;
GLubyte format;
GLubyte dst_loc;
GLuint swizzle;
GLubyte write_mask;
} vert_attrs[VERT_ATTRIB_MAX];
GLubyte vertex_attr_count;
};
/**
* \brief R300 context structure.
*/
struct r300_context {
struct radeon_context radeon; /* parent class, must be first */
struct r300_hw_state hw;
struct r300_state state;
struct gl_vertex_program *curr_vp;
struct r300_vertex_program *selected_vp;
/* Vertex buffers
*/
GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
GLboolean disable_lowimpact_fallback;
struct r300_swtcl_info swtcl;
GLboolean vap_flush_needed;
};
#define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx))
extern void r300DestroyContext(__DRIcontextPrivate * driContextPriv);
extern GLboolean r300CreateContext(const __GLcontextModes * glVisual,
__DRIcontextPrivate * driContextPriv,
void *sharedContextPrivate);
extern void r300SelectVertexShader(r300ContextPtr r300);
extern void r300InitShaderFuncs(struct dd_function_table *functions);
extern int r300VertexProgUpdateParams(GLcontext * ctx,
struct r300_vertex_program_cont *vp,
float *dst);
#define RADEON_D_CAPTURE 0
#define RADEON_D_PLAYBACK 1
#define RADEON_D_PLAYBACK_RAW 2
#define RADEON_D_T 3
#define r300PackFloat32 radeonPackFloat32
#define r300PackFloat24 radeonPackFloat24
#endif /* __R300_CONTEXT_H__ */

View File

@ -0,0 +1,364 @@
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/**
* \file
*
* \author Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/mtypes.h"
#include "main/colormac.h"
#include "main/imports.h"
#include "main/macros.h"
#include "main/image.h"
#include "swrast_setup/swrast_setup.h"
#include "math/m_translate.h"
#include "tnl/tnl.h"
#include "tnl/t_context.h"
#include "r600_context.h"
#include "r600_state.h"
#include "r600_emit.h"
#include "r600_ioctl.h"
#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
#error Cannot change these!
#endif
#define DEBUG_ALL DEBUG_VERTS
#define DW_SIZE(x) ((inputs[tab[(x)]] << R300_DST_VEC_LOC_SHIFT) | \
(attribptr[tab[(x)]]->size - 1) << R300_DATA_TYPE_0_SHIFT)
GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
int *inputs, GLint * tab, GLuint nr)
{
GLuint i, dw;
/* type, inputs, stop bit, size */
for (i = 0; i < nr; i += 2) {
/* make sure input is valid, would lockup the gpu */
assert(inputs[tab[i]] != -1);
dw = (R300_SIGNED | DW_SIZE(i));
if (i + 1 == nr) {
dw |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
} else {
assert(inputs[tab[i + 1]] != -1);
dw |= (R300_SIGNED |
DW_SIZE(i + 1)) << R300_DATA_TYPE_1_SHIFT;
if (i + 2 == nr) {
dw |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
}
}
dst[i >> 1] = dw;
}
return (nr + 1) >> 1;
}
static GLuint r300VAPInputRoute1Swizzle(int swizzle[4])
{
return (swizzle[0] << R300_SWIZZLE_SELECT_X_SHIFT) |
(swizzle[1] << R300_SWIZZLE_SELECT_Y_SHIFT) |
(swizzle[2] << R300_SWIZZLE_SELECT_Z_SHIFT) |
(swizzle[3] << R300_SWIZZLE_SELECT_W_SHIFT);
}
GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr)
{
GLuint i, dw;
for (i = 0; i < nr; i += 2) {
dw = (r300VAPInputRoute1Swizzle(swizzle[i]) |
((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE0_SHIFT;
if (i + 1 < nr) {
dw |= (r300VAPInputRoute1Swizzle(swizzle[i + 1]) |
((R300_WRITE_ENA_X | R300_WRITE_ENA_Y |
R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
}
dst[i >> 1] = dw;
}
return (nr + 1) >> 1;
}
GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
{
/* No idea what this value means. I have seen other values written to
* this register... */
return 0x5555;
}
GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
GLuint i, vic_1 = 0;
if (InputsRead & (1 << VERT_ATTRIB_POS))
vic_1 |= R300_INPUT_CNTL_POS;
if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
vic_1 |= R300_INPUT_CNTL_NORMAL;
if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
vic_1 |= R300_INPUT_CNTL_COLOR;
rmesa->state.texture.tc_count = 0;
for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
rmesa->state.texture.tc_count++;
vic_1 |= R300_INPUT_CNTL_TC0 << i;
}
return vic_1;
}
GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
{
GLuint ret = 0;
if (OutputsWritten & (1 << VERT_RESULT_HPOS))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
if (OutputsWritten & (1 << VERT_RESULT_COL0))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
if (OutputsWritten & (1 << VERT_RESULT_COL1))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
if (OutputsWritten & (1 << VERT_RESULT_BFC0)
|| OutputsWritten & (1 << VERT_RESULT_BFC1))
ret |=
R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT |
R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
return ret;
}
GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
{
GLuint i, ret = 0, first_free_texcoord = 0;
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) {
ret |= (4 << (3 * i));
++first_free_texcoord;
}
}
if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
if (first_free_texcoord > 8) {
fprintf(stderr, "\tout of free texcoords to write fog coord\n");
_mesa_exit(-1);
}
ret |= 4 << (3 * first_free_texcoord);
}
return ret;
}
/* Emit vertex data to GART memory
* Route inputs to the vertex processor
* This function should never return R300_FALLBACK_TCL when using software tcl.
*/
int r300EmitArrays(GLcontext * ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *vb = &tnl->vb;
GLuint nr;
GLuint count = vb->Count;
GLuint i;
GLuint InputsRead = 0, OutputsWritten = 0;
int *inputs = NULL;
int vir_inputs[VERT_ATTRIB_MAX];
GLint tab[VERT_ATTRIB_MAX];
int swizzle[VERT_ATTRIB_MAX][4];
struct r300_vertex_program *prog =
(struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
if (hw_tcl_on) {
inputs = prog->inputs;
InputsRead = prog->key.InputsRead;
OutputsWritten = prog->key.OutputsWritten;
} else {
inputs = rmesa->state.sw_tcl_inputs;
DECLARE_RENDERINPUTS(render_inputs_bitset);
RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
vb->AttribPtr[VERT_ATTRIB_POS] = vb->ClipPtr;
assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS));
assert(RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_NORMAL) == 0);
if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_POS)) {
InputsRead |= 1 << VERT_ATTRIB_POS;
OutputsWritten |= 1 << VERT_RESULT_HPOS;
}
if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
InputsRead |= 1 << VERT_ATTRIB_COLOR0;
OutputsWritten |= 1 << VERT_RESULT_COL0;
}
if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_COLOR1)) {
InputsRead |= 1 << VERT_ATTRIB_COLOR1;
OutputsWritten |= 1 << VERT_RESULT_COL1;
}
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
if (RENDERINPUTS_TEST(render_inputs_bitset, _TNL_ATTRIB_TEX(i))) {
InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
}
}
for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
if (InputsRead & (1 << i)) {
inputs[i] = nr++;
} else {
inputs[i] = -1;
}
}
/* Fixed, apply to vir0 only */
memcpy(vir_inputs, inputs, VERT_ATTRIB_MAX * sizeof(int));
inputs = vir_inputs;
if (InputsRead & VERT_ATTRIB_POS)
inputs[VERT_ATTRIB_POS] = 0;
if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
inputs[VERT_ATTRIB_COLOR0] = 2;
if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
inputs[VERT_ATTRIB_COLOR1] = 3;
for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
if (InputsRead & (1 << i))
inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, render_inputs_bitset);
}
assert(InputsRead);
assert(OutputsWritten);
for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
if (InputsRead & (1 << i)) {
tab[nr++] = i;
}
}
if (nr > R300_MAX_AOS_ARRAYS) {
return R300_FALLBACK_TCL;
}
for (i = 0; i < nr; i++) {
int ci;
swizzle[i][0] = SWIZZLE_ZERO;
swizzle[i][1] = SWIZZLE_ZERO;
swizzle[i][2] = SWIZZLE_ZERO;
swizzle[i][3] = SWIZZLE_ONE;
for (ci = 0; ci < vb->AttribPtr[tab[i]]->size; ci++) {
swizzle[i][ci] = ci;
}
rcommon_emit_vector(ctx, &rmesa->radeon.tcl.aos[i],
vb->AttribPtr[tab[i]]->data,
vb->AttribPtr[tab[i]]->size,
vb->AttribPtr[tab[i]]->stride, count);
}
/* Setup INPUT_ROUTE. */
if (rmesa->radeon.radeonScreen->kernel_mm) {
R300_STATECHANGE(rmesa, vir[0]);
rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
rmesa->hw.vir[0].cmd[0] |=
(r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
vb->AttribPtr, inputs, tab, nr) & 0x3FFF) << 16;
R300_STATECHANGE(rmesa, vir[1]);
rmesa->hw.vir[1].cmd[0] |=
(r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
nr) & 0x3FFF) << 16;
} else {
R300_STATECHANGE(rmesa, vir[0]);
((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count =
r300VAPInputRoute0(&rmesa->hw.vir[0].cmd[R300_VIR_CNTL_0],
vb->AttribPtr, inputs, tab, nr);
R300_STATECHANGE(rmesa, vir[1]);
((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count =
r300VAPInputRoute1(&rmesa->hw.vir[1].cmd[R300_VIR_CNTL_0], swizzle,
nr);
}
/* Setup INPUT_CNTL. */
R300_STATECHANGE(rmesa, vic);
rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
/* Setup OUTPUT_VTX_FMT. */
R300_STATECHANGE(rmesa, vof);
rmesa->hw.vof.cmd[R300_VOF_CNTL_0] =
r300VAPOutputCntl0(ctx, OutputsWritten);
rmesa->hw.vof.cmd[R300_VOF_CNTL_1] =
r300VAPOutputCntl1(ctx, OutputsWritten);
rmesa->radeon.tcl.aos_count = nr;
return R300_FALLBACK_NONE;
}
void r300EmitCacheFlush(r300ContextPtr rmesa)
{
BATCH_LOCALS(&rmesa->radeon);
BEGIN_BATCH_NO_AUTOSTATE(4);
OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
END_BATCH();
COMMIT_BATCH();
}

View File

@ -0,0 +1,234 @@
/*
* Copyright (C) 2005 Vladimir Dergachev.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/*
* Authors:
* Vladimir Dergachev <volodya@mindspring.com>
* Nicolai Haehnle <prefect_@gmx.net>
* Aapo Tahkola <aet@rasterburn.org>
* Ben Skeggs <darktama@iinet.net.au>
* Jerome Glisse <j.glisse@gmail.com>
*/
/* This files defines functions for accessing R300 hardware.
*/
#ifndef __R600_EMIT_H__
#define __R600_EMIT_H__
#include "main/glheader.h"
#include "r600_context.h"
#include "r600_cmdbuf.h"
#include "radeon_reg.h"
static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn,
int reg, int count)
{
if (!rscrn->kernel_mm) {
drm_r300_cmd_header_t cmd;
cmd.u = 0;
cmd.packet0.cmd_type = R300_CMD_PACKET0;
cmd.packet0.count = count;
cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
return cmd.u;
}
if (count) {
return CP_PACKET0(reg, count - 1);
}
return CP_PACKET2;
}
static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count)
{
drm_r300_cmd_header_t cmd;
cmd.u = 0;
cmd.vpu.cmd_type = R300_CMD_VPU;
cmd.vpu.count = count;
cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8;
cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF);
return cmd.u;
}
static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn,
int addr, int count, int type, int clamp)
{
drm_r300_cmd_header_t cmd;
cmd.u = 0;
cmd.r500fp.cmd_type = R300_CMD_R500FP;
cmd.r500fp.count = count;
cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8;
cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0;
cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0;
cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF);
return cmd.u;
}
static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet)
{
drm_r300_cmd_header_t cmd;
cmd.u = 0;
cmd.packet3.cmd_type = R300_CMD_PACKET3;
cmd.packet3.packet = packet;
return cmd.u;
}
static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn,
unsigned short count)
{
drm_r300_cmd_header_t cmd;
cmd.u = 0;
cmd.delay.cmd_type = R300_CMD_CP_DELAY;
cmd.delay.count = count;
return cmd.u;
}
static INLINE uint32_t cmdwait(struct radeon_screen *rscrn,
unsigned char flags)
{
drm_r300_cmd_header_t cmd;
cmd.u = 0;
cmd.wait.cmd_type = R300_CMD_WAIT;
cmd.wait.flags = flags;
return cmd.u;
}
static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn)
{
drm_r300_cmd_header_t cmd;
cmd.u = 0;
cmd.header.cmd_type = R300_CMD_END3D;
return cmd.u;
}
/**
* Write the header of a packet3 to the command buffer.
* Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
*/
#define OUT_BATCH_PACKET3(packet, num_extra) do {\
if (!b_l_rmesa->radeonScreen->kernel_mm) { \
OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\
R300_CMD_PACKET3_RAW)); \
} else b_l_rmesa->cmdbuf.cs->section_cdw++;\
OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
} while(0)
/**
* Must be sent to switch to 2d commands
*/
void static INLINE end_3d(radeonContextPtr radeon)
{
BATCH_LOCALS(radeon);
if (!radeon->radeonScreen->kernel_mm) {
BEGIN_BATCH_NO_AUTOSTATE(1);
OUT_BATCH(cmdpacify(radeon->radeonScreen));
END_BATCH();
}
}
void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
{
BATCH_LOCALS(&rmesa->radeon);
if (!rmesa->radeon.radeonScreen->kernel_mm) {
BEGIN_BATCH_NO_AUTOSTATE(1);
OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count));
END_BATCH();
}
}
void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags)
{
BATCH_LOCALS(radeon);
uint32_t wait_until;
if (!radeon->radeonScreen->kernel_mm) {
BEGIN_BATCH_NO_AUTOSTATE(1);
OUT_BATCH(cmdwait(radeon->radeonScreen, flags));
END_BATCH();
} else {
switch(flags) {
case R300_WAIT_2D:
wait_until = (1 << 14);
break;
case R300_WAIT_3D:
wait_until = (1 << 15);
break;
case R300_NEW_WAIT_2D_3D:
wait_until = (1 << 14) | (1 << 15);
break;
case R300_NEW_WAIT_2D_2D_CLEAN:
wait_until = (1 << 14) | (1 << 16) | (1 << 18);
break;
case R300_NEW_WAIT_3D_3D_CLEAN:
wait_until = (1 << 15) | (1 << 17) | (1 << 18);
break;
case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
wait_until = (1 << 14) | (1 << 16) | (1 << 18);
wait_until |= (1 << 15) | (1 << 17) | (1 << 18);
break;
default:
return;
}
BEGIN_BATCH_NO_AUTOSTATE(2);
OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
OUT_BATCH(wait_until);
END_BATCH();
}
}
extern int r300EmitArrays(GLcontext * ctx);
extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
extern void r300EmitCacheFlush(r300ContextPtr rmesa);
extern GLuint r300VAPInputRoute0(uint32_t * dst, GLvector4f ** attribptr,
int *inputs, GLint * tab, GLuint nr);
extern GLuint r300VAPInputRoute1(uint32_t * dst, int swizzle[][4], GLuint nr);
extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead);
extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead);
extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten);
extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten);
#endif

View File

@ -0,0 +1,699 @@
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* \file
*
* Fragment program compiler. Perform transformations on the intermediate
* representation until the program is in a form where we can translate
* it more or less directly into machine-readable form.
*
* \author Ben Skeggs <darktama@iinet.net.au>
* \author Jerome Glisse <j.glisse@gmail.com>
*/
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "shader/prog_instruction.h"
#include "shader/prog_parameter.h"
#include "shader/prog_print.h"
#include "r600_context.h"
#include "r600_fragprog.h"
#include "r600_fragprog_swizzle.h"
#include "r600_state.h"
#include "radeon_nqssadce.h"
#include "radeon_program_alu.h"
static void reset_srcreg(struct prog_src_register* reg)
{
_mesa_bzero(reg, sizeof(*reg));
reg->Swizzle = SWIZZLE_NOOP;
}
static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
{
gl_state_index fail_value_tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
};
struct prog_src_register reg = { 0, };
fail_value_tokens[2] = tmu;
reg.File = PROGRAM_STATE_VAR;
reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
reg.Swizzle = SWIZZLE_WWWW;
return reg;
}
/**
* Transform TEX, TXP, TXB, and KIL instructions in the following way:
* - premultiply texture coordinates for RECT
* - extract operand swizzles
* - introduce a temporary register when write masks are needed
*
* \todo If/when r5xx uses the radeon_program architecture, this can probably
* be reused.
*/
static GLboolean transform_TEX(
struct radeon_transform_context *t,
struct prog_instruction* orig_inst, void* data)
{
struct r300_fragment_program_compiler *compiler =
(struct r300_fragment_program_compiler*)data;
struct prog_instruction inst = *orig_inst;
struct prog_instruction* tgt;
GLboolean destredirect = GL_FALSE;
if (inst.Opcode != OPCODE_TEX &&
inst.Opcode != OPCODE_TXB &&
inst.Opcode != OPCODE_TXP &&
inst.Opcode != OPCODE_KIL)
return GL_FALSE;
if (inst.Opcode != OPCODE_KIL &&
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg = inst.DstReg;
if (comparefunc == GL_ALWAYS) {
tgt->SrcReg[0].File = PROGRAM_BUILTIN;
tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
} else {
tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
}
return GL_TRUE;
}
inst.DstReg.File = PROGRAM_TEMPORARY;
inst.DstReg.Index = radeonFindFreeTemporary(t);
inst.DstReg.WriteMask = WRITEMASK_XYZW;
}
/* Hardware uses [0..1]x[0..1] range for rectangle textures
* instead of [0..Width]x[0..Height].
* Add a scaling instruction.
*/
if (inst.Opcode != OPCODE_KIL && inst.TexSrcTarget == TEXTURE_RECT_INDEX) {
gl_state_index tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_R300_TEXRECT_FACTOR, 0, 0,
0
};
int tempreg = radeonFindFreeTemporary(t);
int factor_index;
tokens[2] = inst.TexSrcUnit;
factor_index = _mesa_add_state_reference(t->Program->Parameters, tokens);
tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MUL;
tgt->DstReg.File = PROGRAM_TEMPORARY;
tgt->DstReg.Index = tempreg;
tgt->SrcReg[0] = inst.SrcReg[0];
tgt->SrcReg[1].File = PROGRAM_STATE_VAR;
tgt->SrcReg[1].Index = factor_index;
reset_srcreg(&inst.SrcReg[0]);
inst.SrcReg[0].File = PROGRAM_TEMPORARY;
inst.SrcReg[0].Index = tempreg;
}
if (inst.Opcode != OPCODE_KIL) {
if (inst.DstReg.File != PROGRAM_TEMPORARY ||
inst.DstReg.WriteMask != WRITEMASK_XYZW) {
int tempreg = radeonFindFreeTemporary(t);
inst.DstReg.File = PROGRAM_TEMPORARY;
inst.DstReg.Index = tempreg;
inst.DstReg.WriteMask = WRITEMASK_XYZW;
destredirect = GL_TRUE;
}
}
if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
int tmpreg = radeonFindFreeTemporary(t);
tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg.File = PROGRAM_TEMPORARY;
tgt->DstReg.Index = tmpreg;
tgt->SrcReg[0] = inst.SrcReg[0];
reset_srcreg(&inst.SrcReg[0]);
inst.SrcReg[0].File = PROGRAM_TEMPORARY;
inst.SrcReg[0].Index = tmpreg;
}
tgt = radeonAppendInstructions(t->Program, 1);
_mesa_copy_instructions(tgt, &inst, 1);
if (inst.Opcode != OPCODE_KIL &&
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
int rcptemp = radeonFindFreeTemporary(t);
int pass, fail;
tgt = radeonAppendInstructions(t->Program, 3);
tgt[0].Opcode = OPCODE_RCP;
tgt[0].DstReg.File = PROGRAM_TEMPORARY;
tgt[0].DstReg.Index = rcptemp;
tgt[0].DstReg.WriteMask = WRITEMASK_W;
tgt[0].SrcReg[0] = inst.SrcReg[0];
tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
tgt[1].Opcode = OPCODE_MAD;
tgt[1].DstReg = inst.DstReg;
tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
tgt[1].SrcReg[0] = inst.SrcReg[0];
tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
tgt[1].SrcReg[1].Index = rcptemp;
tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
tgt[1].SrcReg[2].Index = inst.DstReg.Index;
if (depthmode == 0) /* GL_LUMINANCE */
tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
else if (depthmode == 2) /* GL_ALPHA */
tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
* r < tex <=> -tex+r < 0
* r >= tex <=> not (-tex+r < 0 */
if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW;
else
tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW;
tgt[2].Opcode = OPCODE_CMP;
tgt[2].DstReg = orig_inst->DstReg;
tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
pass = 1;
fail = 2;
} else {
pass = 2;
fail = 1;
}
tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
} else if (destredirect) {
tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg = orig_inst->DstReg;
tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
tgt->SrcReg[0].Index = inst.DstReg.Index;
}
return GL_TRUE;
}
static void update_params(r300ContextPtr r300, struct r300_fragment_program *fp)
{
struct gl_fragment_program *mp = &fp->mesa_program;
/* Ask Mesa nicely to fill in ParameterValues for us */
if (mp->Base.Parameters)
_mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters);
}
/**
* Transform the program to support fragment.position.
*
* Introduce a small fragment at the start of the program that will be
* the only code that directly reads the FRAG_ATTRIB_WPOS input.
* All other code pieces that reference that input will be rewritten
* to read from a newly allocated temporary.
*
* \todo if/when r5xx supports the radeon_program architecture, this is a
* likely candidate for code sharing.
*/
static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler)
{
GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead;
if (!(InputsRead & FRAG_BIT_WPOS))
return;
static gl_state_index tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
};
struct prog_instruction *fpi;
GLuint window_index;
int i = 0;
GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
_mesa_insert_instructions(compiler->program, 0, 3);
fpi = compiler->program->Instructions;
/* perspective divide */
fpi[i].Opcode = OPCODE_RCP;
fpi[i].DstReg.File = PROGRAM_TEMPORARY;
fpi[i].DstReg.Index = tempregi;
fpi[i].DstReg.WriteMask = WRITEMASK_W;
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_INPUT;
fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
i++;
fpi[i].Opcode = OPCODE_MUL;
fpi[i].DstReg.File = PROGRAM_TEMPORARY;
fpi[i].DstReg.Index = tempregi;
fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_INPUT;
fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
fpi[i].SrcReg[1].Index = tempregi;
fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
i++;
/* viewport transformation */
window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
fpi[i].Opcode = OPCODE_MAD;
fpi[i].DstReg.File = PROGRAM_TEMPORARY;
fpi[i].DstReg.Index = tempregi;
fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
fpi[i].SrcReg[0].Index = tempregi;
fpi[i].SrcReg[0].Swizzle =
MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
fpi[i].SrcReg[1].Index = window_index;
fpi[i].SrcReg[1].Swizzle =
MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
fpi[i].SrcReg[2].Index = window_index;
fpi[i].SrcReg[2].Swizzle =
MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
i++;
for (; i < compiler->program->NumInstructions; ++i) {
int reg;
for (reg = 0; reg < 3; reg++) {
if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
fpi[i].SrcReg[reg].Index = tempregi;
}
}
}
}
static void nqssadce_init(struct nqssadce_state* s)
{
s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
}
static GLuint build_dtm(GLuint depthmode)
{
switch(depthmode) {
default:
case GL_LUMINANCE: return 0;
case GL_INTENSITY: return 1;
case GL_ALPHA: return 2;
}
}
static GLuint build_func(GLuint comparefunc)
{
return comparefunc - GL_NEVER;
}
/**
* Collect all external state that is relevant for compiling the given
* fragment program.
*/
static void build_state(
r300ContextPtr r300,
struct r300_fragment_program *fp,
struct r300_fragment_program_external_state *state)
{
int unit;
_mesa_bzero(state, sizeof(*state));
for(unit = 0; unit < 16; ++unit) {
if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) {
struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
}
}
}
void r300TranslateFragmentShader(r300ContextPtr r300,
struct r300_fragment_program *fp)
{
struct r300_fragment_program_external_state state;
build_state(r300, fp, &state);
if (_mesa_memcmp(&fp->state, &state, sizeof(state))) {
/* TODO: cache compiled programs */
fp->translated = GL_FALSE;
_mesa_memcpy(&fp->state, &state, sizeof(state));
}
if (!fp->translated) {
struct r300_fragment_program_compiler compiler;
compiler.r300 = r300;
compiler.fp = fp;
compiler.code = &fp->code;
compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base);
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Fragment Program: Initial program:\n");
_mesa_print_program(compiler.program);
}
insert_WPOS_trailer(&compiler);
struct radeon_program_transformation transformations[] = {
{ &transform_TEX, &compiler },
{ &radeonTransformALU, 0 },
{ &radeonTransformTrigSimple, 0 }
};
radeonLocalTransform(
r300->radeon.glCtx,
compiler.program,
3, transformations);
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Fragment Program: After native rewrite:\n");
_mesa_print_program(compiler.program);
}
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &r300FPIsNativeSwizzle,
.BuildSwizzle = &r300FPBuildSwizzle,
.RewriteDepthOut = GL_TRUE
};
radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Compiler: after NqSSA-DCE:\n");
_mesa_print_program(compiler.program);
}
if (!r300FragmentProgramEmit(&compiler))
fp->error = GL_TRUE;
/* Subtle: Rescue any parameters that have been added during transformations */
_mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
fp->mesa_program.Base.Parameters = compiler.program->Parameters;
compiler.program->Parameters = 0;
_mesa_reference_program(r300->radeon.glCtx, &compiler.program, NULL);
if (!fp->error)
fp->translated = GL_TRUE;
if (fp->error || (RADEON_DEBUG & DEBUG_PIXEL))
r300FragmentProgramDump(fp, &fp->code);
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
}
update_params(r300, fp);
}
/* just some random things... */
void r300FragmentProgramDump(
struct r300_fragment_program *fp,
struct r300_fragment_program_code *code)
{
int n, i, j;
static int pc = 0;
fprintf(stderr, "pc=%d*************************************\n", pc++);
fprintf(stderr, "Hardware program\n");
fprintf(stderr, "----------------\n");
for (n = 0; n < (code->cur_node + 1); n++) {
fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
"alu_end: %d, tex_end: %d, flags: %08x\n", n,
code->node[n].alu_offset,
code->node[n].tex_offset,
code->node[n].alu_end, code->node[n].tex_end,
code->node[n].flags);
if (n > 0 || code->first_node_has_tex) {
fprintf(stderr, " TEX:\n");
for (i = code->node[n].tex_offset;
i <= code->node[n].tex_offset + code->node[n].tex_end;
++i) {
const char *instr;
switch ((code->tex.
inst[i] >> R300_TEX_INST_SHIFT) &
15) {
case R300_TEX_OP_LD:
instr = "TEX";
break;
case R300_TEX_OP_KIL:
instr = "KIL";
break;
case R300_TEX_OP_TXP:
instr = "TXP";
break;
case R300_TEX_OP_TXB:
instr = "TXB";
break;
default:
instr = "UNKNOWN";
}
fprintf(stderr,
" %s t%i, %c%i, texture[%i] (%08x)\n",
instr,
(code->tex.
inst[i] >> R300_DST_ADDR_SHIFT) & 31,
't',
(code->tex.
inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
(code->tex.
inst[i] & R300_TEX_ID_MASK) >>
R300_TEX_ID_SHIFT,
code->tex.inst[i]);
}
}
for (i = code->node[n].alu_offset;
i <= code->node[n].alu_offset + code->node[n].alu_end; ++i) {
char srcc[3][10], dstc[20];
char srca[3][10], dsta[20];
char argc[3][20];
char arga[3][20];
char flags[5], tmp[10];
for (j = 0; j < 3; ++j) {
int regc = code->alu.inst[i].inst1 >> (j * 6);
int rega = code->alu.inst[i].inst3 >> (j * 6);
sprintf(srcc[j], "%c%i",
(regc & 32) ? 'c' : 't', regc & 31);
sprintf(srca[j], "%c%i",
(rega & 32) ? 'c' : 't', rega & 31);
}
dstc[0] = 0;
sprintf(flags, "%s%s%s",
(code->alu.inst[i].
inst1 & R300_ALU_DSTC_REG_X) ? "x" : "",
(code->alu.inst[i].
inst1 & R300_ALU_DSTC_REG_Y) ? "y" : "",
(code->alu.inst[i].
inst1 & R300_ALU_DSTC_REG_Z) ? "z" : "");
if (flags[0] != 0) {
sprintf(dstc, "t%i.%s ",
(code->alu.inst[i].
inst1 >> R300_ALU_DSTC_SHIFT) & 31,
flags);
}
sprintf(flags, "%s%s%s",
(code->alu.inst[i].
inst1 & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
(code->alu.inst[i].
inst1 & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
(code->alu.inst[i].
inst1 & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
if (flags[0] != 0) {
sprintf(tmp, "o%i.%s",
(code->alu.inst[i].
inst1 >> R300_ALU_DSTC_SHIFT) & 31,
flags);
strcat(dstc, tmp);
}
dsta[0] = 0;
if (code->alu.inst[i].inst3 & R300_ALU_DSTA_REG) {
sprintf(dsta, "t%i.w ",
(code->alu.inst[i].
inst3 >> R300_ALU_DSTA_SHIFT) & 31);
}
if (code->alu.inst[i].inst3 & R300_ALU_DSTA_OUTPUT) {
sprintf(tmp, "o%i.w ",
(code->alu.inst[i].
inst3 >> R300_ALU_DSTA_SHIFT) & 31);
strcat(dsta, tmp);
}
if (code->alu.inst[i].inst3 & R300_ALU_DSTA_DEPTH) {
strcat(dsta, "Z");
}
fprintf(stderr,
"%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
" w: %3s %3s %3s -> %-20s (%08x)\n", i,
srcc[0], srcc[1], srcc[2], dstc,
code->alu.inst[i].inst1, srca[0], srca[1],
srca[2], dsta, code->alu.inst[i].inst3);
for (j = 0; j < 3; ++j) {
int regc = code->alu.inst[i].inst0 >> (j * 7);
int rega = code->alu.inst[i].inst2 >> (j * 7);
int d;
char buf[20];
d = regc & 31;
if (d < 12) {
switch (d % 4) {
case R300_ALU_ARGC_SRC0C_XYZ:
sprintf(buf, "%s.xyz",
srcc[d / 4]);
break;
case R300_ALU_ARGC_SRC0C_XXX:
sprintf(buf, "%s.xxx",
srcc[d / 4]);
break;
case R300_ALU_ARGC_SRC0C_YYY:
sprintf(buf, "%s.yyy",
srcc[d / 4]);
break;
case R300_ALU_ARGC_SRC0C_ZZZ:
sprintf(buf, "%s.zzz",
srcc[d / 4]);
break;
}
} else if (d < 15) {
sprintf(buf, "%s.www", srca[d - 12]);
} else if (d == 20) {
sprintf(buf, "0.0");
} else if (d == 21) {
sprintf(buf, "1.0");
} else if (d == 22) {
sprintf(buf, "0.5");
} else if (d >= 23 && d < 32) {
d -= 23;
switch (d / 3) {
case 0:
sprintf(buf, "%s.yzx",
srcc[d % 3]);
break;
case 1:
sprintf(buf, "%s.zxy",
srcc[d % 3]);
break;
case 2:
sprintf(buf, "%s.Wzy",
srcc[d % 3]);
break;
}
} else {
sprintf(buf, "%i", d);
}
sprintf(argc[j], "%s%s%s%s",
(regc & 32) ? "-" : "",
(regc & 64) ? "|" : "",
buf, (regc & 64) ? "|" : "");
d = rega & 31;
if (d < 9) {
sprintf(buf, "%s.%c", srcc[d / 3],
'x' + (char)(d % 3));
} else if (d < 12) {
sprintf(buf, "%s.w", srca[d - 9]);
} else if (d == 16) {
sprintf(buf, "0.0");
} else if (d == 17) {
sprintf(buf, "1.0");
} else if (d == 18) {
sprintf(buf, "0.5");
} else {
sprintf(buf, "%i", d);
}
sprintf(arga[j], "%s%s%s%s",
(rega & 32) ? "-" : "",
(rega & 64) ? "|" : "",
buf, (rega & 64) ? "|" : "");
}
fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
" w: %8s %8s %8s op: %08x\n",
argc[0], argc[1], argc[2],
code->alu.inst[i].inst0, arga[0], arga[1],
arga[2], code->alu.inst[i].inst2);
}
}
}

View File

@ -0,0 +1,132 @@
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/*
* Authors:
* Ben Skeggs <darktama@iinet.net.au>
* Jerome Glisse <j.glisse@gmail.com>
*/
#ifndef __R600_FRAGPROG_H_
#define __R600_FRAGPROG_H_
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
#include "shader/prog_instruction.h"
#include "r600_context.h"
#include "radeon_program.h"
#define DRI_CONF_FP_OPTIMIZATION_SPEED 0
#define DRI_CONF_FP_OPTIMIZATION_QUALITY 1
#if 1
/**
* Fragment program helper macros
*/
/* Produce unshifted source selectors */
#define FP_TMP(idx) (idx)
#define FP_CONST(idx) ((idx) | (1 << 5))
/* Produce source/dest selector dword */
#define FP_SELC_MASK_NO 0
#define FP_SELC_MASK_X 1
#define FP_SELC_MASK_Y 2
#define FP_SELC_MASK_XY 3
#define FP_SELC_MASK_Z 4
#define FP_SELC_MASK_XZ 5
#define FP_SELC_MASK_YZ 6
#define FP_SELC_MASK_XYZ 7
#define FP_SELC(destidx,regmask,outmask,src0,src1,src2) \
(((destidx) << R300_ALU_DSTC_SHIFT) | \
(FP_SELC_MASK_##regmask << 23) | \
(FP_SELC_MASK_##outmask << 26) | \
((src0) << R300_ALU_SRC0C_SHIFT) | \
((src1) << R300_ALU_SRC1C_SHIFT) | \
((src2) << R300_ALU_SRC2C_SHIFT))
#define FP_SELA_MASK_NO 0
#define FP_SELA_MASK_W 1
#define FP_SELA(destidx,regmask,outmask,src0,src1,src2) \
(((destidx) << R300_ALU_DSTA_SHIFT) | \
(FP_SELA_MASK_##regmask << 23) | \
(FP_SELA_MASK_##outmask << 24) | \
((src0) << R300_ALU_SRC0A_SHIFT) | \
((src1) << R300_ALU_SRC1A_SHIFT) | \
((src2) << R300_ALU_SRC2A_SHIFT))
/* Produce unshifted argument selectors */
#define FP_ARGC(source) R300_ALU_ARGC_##source
#define FP_ARGA(source) R300_ALU_ARGA_##source
#define FP_ABS(arg) ((arg) | (1 << 6))
#define FP_NEG(arg) ((arg) ^ (1 << 5))
/* Produce instruction dword */
#define FP_INSTRC(opcode,arg0,arg1,arg2) \
(R300_ALU_OUTC_##opcode | \
((arg0) << R300_ALU_ARG0C_SHIFT) | \
((arg1) << R300_ALU_ARG1C_SHIFT) | \
((arg2) << R300_ALU_ARG2C_SHIFT))
#define FP_INSTRA(opcode,arg0,arg1,arg2) \
(R300_ALU_OUTA_##opcode | \
((arg0) << R300_ALU_ARG0A_SHIFT) | \
((arg1) << R300_ALU_ARG1A_SHIFT) | \
((arg2) << R300_ALU_ARG2A_SHIFT))
#endif
struct r300_fragment_program;
extern void r300TranslateFragmentShader(r300ContextPtr r300,
struct r300_fragment_program *fp);
/**
* Used internally by the r300 fragment program code to store compile-time
* only data.
*/
struct r300_fragment_program_compiler {
r300ContextPtr r300;
struct r300_fragment_program *fp;
struct r300_fragment_program_code *code;
struct gl_program *program;
};
extern GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler);
extern void r300FragmentProgramDump(
struct r300_fragment_program *fp,
struct r300_fragment_program_code *code);
#endif

View File

@ -0,0 +1,344 @@
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* \file
*
* Emit the r300_fragment_program_code that can be understood by the hardware.
* Input is a pre-transformed radeon_program.
*
* \author Ben Skeggs <darktama@iinet.net.au>
*
* \author Jerome Glisse <j.glisse@gmail.com>
*
* \todo FogOption
*/
#include "r600_fragprog.h"
#include "radeon_program_pair.h"
#include "r600_fragprog_swizzle.h"
#include "r600_reg.h"
#define PROG_CODE \
struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)data; \
struct r300_fragment_program_code *code = c->code
#define error(fmt, args...) do { \
fprintf(stderr, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
static GLboolean emit_const(void* data, GLuint file, GLuint index, GLuint *hwindex)
{
PROG_CODE;
for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
if (code->constant[*hwindex].File == file &&
code->constant[*hwindex].Index == index)
break;
}
if (*hwindex >= code->const_nr) {
if (*hwindex >= PFS_NUM_CONST_REGS) {
error("Out of hw constants!\n");
return GL_FALSE;
}
code->const_nr++;
code->constant[*hwindex].File = file;
code->constant[*hwindex].Index = index;
}
return GL_TRUE;
}
/**
* Mark a temporary register as used.
*/
static void use_temporary(struct r300_fragment_program_code *code, GLuint index)
{
if (index > code->max_temp_idx)
code->max_temp_idx = index;
}
static GLuint translate_rgb_opcode(GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R300_ALU_OUTC_CMP;
case OPCODE_DP3: return R300_ALU_OUTC_DP3;
case OPCODE_DP4: return R300_ALU_OUTC_DP4;
case OPCODE_FRC: return R300_ALU_OUTC_FRC;
default:
error("translate_rgb_opcode(%i): Unknown opcode", opcode);
/* fall through */
case OPCODE_NOP:
/* fall through */
case OPCODE_MAD: return R300_ALU_OUTC_MAD;
case OPCODE_MAX: return R300_ALU_OUTC_MAX;
case OPCODE_MIN: return R300_ALU_OUTC_MIN;
case OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
}
}
static GLuint translate_alpha_opcode(GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R300_ALU_OUTA_CMP;
case OPCODE_DP3: return R300_ALU_OUTA_DP4;
case OPCODE_DP4: return R300_ALU_OUTA_DP4;
case OPCODE_EX2: return R300_ALU_OUTA_EX2;
case OPCODE_FRC: return R300_ALU_OUTA_FRC;
case OPCODE_LG2: return R300_ALU_OUTA_LG2;
default:
error("translate_rgb_opcode(%i): Unknown opcode", opcode);
/* fall through */
case OPCODE_NOP:
/* fall through */
case OPCODE_MAD: return R300_ALU_OUTA_MAD;
case OPCODE_MAX: return R300_ALU_OUTA_MAX;
case OPCODE_MIN: return R300_ALU_OUTA_MIN;
case OPCODE_RCP: return R300_ALU_OUTA_RCP;
case OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
}
}
/**
* Emit one paired ALU instruction.
*/
static GLboolean emit_alu(void* data, struct radeon_pair_instruction* inst)
{
PROG_CODE;
if (code->alu.length >= PFS_MAX_ALU_INST) {
error("Too many ALU instructions");
return GL_FALSE;
}
int ip = code->alu.length++;
int j;
code->node[code->cur_node].alu_end++;
code->alu.inst[ip].inst0 = translate_rgb_opcode(inst->RGB.Opcode);
code->alu.inst[ip].inst2 = translate_alpha_opcode(inst->Alpha.Opcode);
for(j = 0; j < 3; ++j) {
GLuint src = inst->RGB.Src[j].Index | (inst->RGB.Src[j].Constant << 5);
if (!inst->RGB.Src[j].Constant)
use_temporary(code, inst->RGB.Src[j].Index);
code->alu.inst[ip].inst1 |= src << (6*j);
src = inst->Alpha.Src[j].Index | (inst->Alpha.Src[j].Constant << 5);
if (!inst->Alpha.Src[j].Constant)
use_temporary(code, inst->Alpha.Src[j].Index);
code->alu.inst[ip].inst3 |= src << (6*j);
GLuint arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
arg |= inst->RGB.Arg[j].Abs << 6;
arg |= inst->RGB.Arg[j].Negate << 5;
code->alu.inst[ip].inst0 |= arg << (7*j);
arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
arg |= inst->Alpha.Arg[j].Abs << 6;
arg |= inst->Alpha.Arg[j].Negate << 5;
code->alu.inst[ip].inst2 |= arg << (7*j);
}
if (inst->RGB.Saturate)
code->alu.inst[ip].inst0 |= R300_ALU_OUTC_CLAMP;
if (inst->Alpha.Saturate)
code->alu.inst[ip].inst2 |= R300_ALU_OUTA_CLAMP;
if (inst->RGB.WriteMask) {
use_temporary(code, inst->RGB.DestIndex);
code->alu.inst[ip].inst1 |=
(inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
}
if (inst->RGB.OutputWriteMask) {
code->alu.inst[ip].inst1 |= (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT);
code->node[code->cur_node].flags |= R300_RGBA_OUT;
}
if (inst->Alpha.WriteMask) {
use_temporary(code, inst->Alpha.DestIndex);
code->alu.inst[ip].inst3 |=
(inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
R300_ALU_DSTA_REG;
}
if (inst->Alpha.OutputWriteMask) {
code->alu.inst[ip].inst3 |= R300_ALU_DSTA_OUTPUT;
code->node[code->cur_node].flags |= R300_RGBA_OUT;
}
if (inst->Alpha.DepthWriteMask) {
code->alu.inst[ip].inst3 |= R300_ALU_DSTA_DEPTH;
code->node[code->cur_node].flags |= R300_W_OUT;
c->fp->WritesDepth = GL_TRUE;
}
return GL_TRUE;
}
/**
* Finish the current node without advancing to the next one.
*/
static GLboolean finish_node(struct r300_fragment_program_compiler *c)
{
struct r300_fragment_program_code *code = c->code;
struct r300_fragment_program_node *node = &code->node[code->cur_node];
if (node->alu_end < 0) {
/* Generate a single NOP for this node */
struct radeon_pair_instruction inst;
_mesa_bzero(&inst, sizeof(inst));
if (!emit_alu(c, &inst))
return GL_FALSE;
}
if (node->tex_end < 0) {
if (code->cur_node == 0) {
node->tex_end = 0;
} else {
error("Node %i has no TEX instructions", code->cur_node);
return GL_FALSE;
}
} else {
if (code->cur_node == 0)
code->first_node_has_tex = 1;
}
return GL_TRUE;
}
/**
* Begin a block of texture instructions.
* Create the necessary indirection.
*/
static GLboolean begin_tex(void* data)
{
PROG_CODE;
if (code->cur_node == 0) {
if (code->node[0].alu_end < 0 &&
code->node[0].tex_end < 0)
return GL_TRUE;
}
if (code->cur_node == 3) {
error("Too many texture indirections");
return GL_FALSE;
}
if (!finish_node(c))
return GL_FALSE;
struct r300_fragment_program_node *node = &code->node[++code->cur_node];
node->alu_offset = code->alu.length;
node->alu_end = -1;
node->tex_offset = code->tex.length;
node->tex_end = -1;
return GL_TRUE;
}
static GLboolean emit_tex(void* data, struct prog_instruction* inst)
{
PROG_CODE;
if (code->tex.length >= PFS_MAX_TEX_INST) {
error("Too many TEX instructions");
return GL_FALSE;
}
GLuint unit = inst->TexSrcUnit;
GLuint dest = inst->DstReg.Index;
GLuint opcode;
switch(inst->Opcode) {
case OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
case OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
case OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
case OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
default:
error("Unknown texture opcode %i", inst->Opcode);
return GL_FALSE;
}
if (inst->Opcode == OPCODE_KIL) {
unit = 0;
dest = 0;
} else {
use_temporary(code, dest);
}
use_temporary(code, inst->SrcReg[0].Index);
code->node[code->cur_node].tex_end++;
code->tex.inst[code->tex.length++] =
(inst->SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
(dest << R300_DST_ADDR_SHIFT) |
(unit << R300_TEX_ID_SHIFT) |
(opcode << R300_TEX_INST_SHIFT);
return GL_TRUE;
}
static const struct radeon_pair_handler pair_handler = {
.EmitConst = &emit_const,
.EmitPaired = &emit_alu,
.EmitTex = &emit_tex,
.BeginTexBlock = &begin_tex,
.MaxHwTemps = PFS_NUM_TEMP_REGS
};
/**
* Final compilation step: Turn the intermediate radeon_program into
* machine-readable instructions.
*/
GLboolean r300FragmentProgramEmit(struct r300_fragment_program_compiler *compiler)
{
struct r300_fragment_program_code *code = compiler->code;
_mesa_bzero(code, sizeof(struct r300_fragment_program_code));
code->node[0].alu_end = -1;
code->node[0].tex_end = -1;
if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
return GL_FALSE;
if (!finish_node(compiler))
return GL_FALSE;
return GL_TRUE;
}

View File

@ -0,0 +1,227 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* @file
* Utilities to deal with the somewhat odd restriction on R300 fragment
* program swizzles.
*/
#include "r600_fragprog_swizzle.h"
#include "r600_reg.h"
#include "radeon_nqssadce.h"
#define MAKE_SWZ3(x, y, z) (MAKE_SWIZZLE4(SWIZZLE_##x, SWIZZLE_##y, SWIZZLE_##z, SWIZZLE_ZERO))
struct swizzle_data {
GLuint hash; /**< swizzle value this matches */
GLuint base; /**< base value for hw swizzle */
GLuint stride; /**< difference in base between arg0/1/2 */
};
static const struct swizzle_data native_swizzles[] = {
{MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4},
{MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4},
{MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4},
{MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4},
{MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1},
{MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1},
{MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
{MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
{MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
{MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0}
};
static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
/**
* Find a native RGB swizzle that matches the given swizzle.
* Returns 0 if none found.
*/
static const struct swizzle_data* lookup_native_swizzle(GLuint swizzle)
{
int i, comp;
for(i = 0; i < num_native_swizzles; ++i) {
const struct swizzle_data* sd = &native_swizzles[i];
for(comp = 0; comp < 3; ++comp) {
GLuint swz = GET_SWZ(swizzle, comp);
if (swz == SWIZZLE_NIL)
continue;
if (swz != GET_SWZ(sd->hash, comp))
break;
}
if (comp == 3)
return sd;
}
return 0;
}
/**
* Check whether the given instruction supports the swizzle and negate
* combinations in the given source register.
*/
GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg)
{
if (reg.Abs)
reg.NegateBase = 0;
if (opcode == OPCODE_KIL ||
opcode == OPCODE_TEX ||
opcode == OPCODE_TXB ||
opcode == OPCODE_TXP) {
int j;
if (reg.Abs || reg.NegateBase != (15*reg.NegateAbs))
return GL_FALSE;
for(j = 0; j < 4; ++j) {
GLuint swz = GET_SWZ(reg.Swizzle, j);
if (swz == SWIZZLE_NIL)
continue;
if (swz != j)
return GL_FALSE;
}
return GL_TRUE;
}
GLuint relevant = 0;
int j;
for(j = 0; j < 3; ++j)
if (GET_SWZ(reg.Swizzle, j) != SWIZZLE_NIL)
relevant |= 1 << j;
if ((reg.NegateBase & relevant) && (reg.NegateBase & relevant) != relevant)
return GL_FALSE;
if (!lookup_native_swizzle(reg.Swizzle))
return GL_FALSE;
return GL_TRUE;
}
/**
* Generate MOV dst, src using only native swizzles.
*/
void r300FPBuildSwizzle(struct nqssadce_state *s, struct prog_dst_register dst, struct prog_src_register src)
{
if (src.Abs)
src.NegateBase = 0;
while(dst.WriteMask) {
const struct swizzle_data *best_swizzle = 0;
GLuint best_matchcount = 0;
GLuint best_matchmask = 0;
GLboolean rgbnegate;
int i, comp;
for(i = 0; i < num_native_swizzles; ++i) {
const struct swizzle_data *sd = &native_swizzles[i];
GLuint matchcount = 0;
GLuint matchmask = 0;
for(comp = 0; comp < 3; ++comp) {
if (!GET_BIT(dst.WriteMask, comp))
continue;
GLuint swz = GET_SWZ(src.Swizzle, comp);
if (swz == SWIZZLE_NIL)
continue;
if (swz == GET_SWZ(sd->hash, comp)) {
matchcount++;
matchmask |= 1 << comp;
}
}
if (matchcount > best_matchcount) {
best_swizzle = sd;
best_matchcount = matchcount;
best_matchmask = matchmask;
if (matchmask == (dst.WriteMask & WRITEMASK_XYZ))
break;
}
}
if ((src.NegateBase & best_matchmask) != 0) {
best_matchmask &= src.NegateBase;
rgbnegate = !src.NegateAbs;
} else {
rgbnegate = src.NegateAbs;
}
struct prog_instruction *inst;
_mesa_insert_instructions(s->Program, s->IP, 1);
inst = s->Program->Instructions + s->IP++;
inst->Opcode = OPCODE_MOV;
inst->DstReg = dst;
inst->DstReg.WriteMask &= (best_matchmask | WRITEMASK_W);
inst->SrcReg[0] = src;
/* Note: We rely on NqSSA/DCE to set unused swizzle components to NIL */
dst.WriteMask &= ~inst->DstReg.WriteMask;
}
}
/**
* Translate an RGB (XYZ) swizzle into the hardware code for the given
* instruction source.
*/
GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle)
{
const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
if (!sd) {
_mesa_printf("Not a native swizzle: %08x\n", swizzle);
return 0;
}
return sd->base + src*sd->stride;
}
/**
* Translate an Alpha (W) swizzle into the hardware code for the given
* instruction source.
*/
GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle)
{
if (swizzle < 3)
return swizzle + 3*src;
switch(swizzle) {
case SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
case SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
case SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
default: return R300_ALU_ARGA_ONE;
}
}

View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __R600_FRAGPROG_SWIZZLE_H_
#define __R600_FRAGPROG_SWIZZLE_H_
#include "main/glheader.h"
#include "shader/prog_instruction.h"
struct nqssadce_state;
GLboolean r300FPIsNativeSwizzle(GLuint opcode, struct prog_src_register reg);
void r300FPBuildSwizzle(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
GLuint r300FPTranslateRGBSwizzle(GLuint src, GLuint swizzle);
GLuint r300FPTranslateAlphaSwizzle(GLuint src, GLuint swizzle);
#endif /* __R300_FRAGPROG_SWIZZLE_H_ */

View File

@ -0,0 +1,667 @@
/*
Copyright (C) The Weather Channel, Inc. 2002.
Copyright (C) 2004 Nicolai Haehnle.
All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/**
* \file
*
* \author Keith Whitwell <keith@tungstengraphics.com>
*
* \author Nicolai Haehnle <prefect_@gmx.net>
*/
#include <sched.h>
#include <errno.h>
#include "main/glheader.h"
#include "main/imports.h"
#include "main/macros.h"
#include "main/context.h"
#include "swrast/swrast.h"
#include "radeon_common.h"
#include "radeon_lock.h"
#include "r600_context.h"
#include "r600_ioctl.h"
#include "r600_cmdbuf.h"
#include "r600_state.h"
#include "r600_vertprog.h"
#include "radeon_reg.h"
#include "r600_emit.h"
#include "r600_fragprog.h"
#include "r600_context.h"
#include "vblank.h"
#define R200_3D_DRAW_IMMD_2 0xC0003500
#define CLEARBUFFER_COLOR 0x1
#define CLEARBUFFER_DEPTH 0x2
#define CLEARBUFFER_STENCIL 0x4
static void r300EmitClearState(GLcontext * ctx);
static void r300UserClear(GLcontext *ctx, GLuint mask)
{
radeon_clear_tris(ctx, mask);
}
static void r300ClearBuffer(r300ContextPtr r300, int flags,
struct radeon_renderbuffer *rrb,
struct radeon_renderbuffer *rrbd)
{
BATCH_LOCALS(&r300->radeon);
GLcontext *ctx = r300->radeon.glCtx;
__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
GLuint cbpitch = 0;
r300ContextPtr rmesa = r300;
if (RADEON_DEBUG & DEBUG_IOCTL)
fprintf(stderr, "%s: buffer %p (%i,%i %ix%i)\n",
__FUNCTION__, rrb, dPriv->x, dPriv->y,
dPriv->w, dPriv->h);
if (rrb) {
cbpitch = (rrb->pitch / rrb->cpp);
if (rrb->cpp == 4)
cbpitch |= R300_COLOR_FORMAT_ARGB8888;
else
cbpitch |= R300_COLOR_FORMAT_RGB565;
if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
cbpitch |= R300_COLOR_TILE_ENABLE;
}
}
/* TODO in bufmgr */
cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
end_3d(&rmesa->radeon);
if (flags & CLEARBUFFER_COLOR) {
assert(rrb != 0);
BEGIN_BATCH_NO_AUTOSTATE(6);
OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_BATCH_REGVAL(R300_RB3D_COLORPITCH0, cbpitch);
END_BATCH();
}
#if 1
if (flags & (CLEARBUFFER_DEPTH | CLEARBUFFER_STENCIL)) {
assert(rrbd != 0);
cbpitch = (rrbd->pitch / rrbd->cpp);
if (rrbd->bo->flags & RADEON_BO_FLAGS_MACRO_TILE){
cbpitch |= R300_DEPTHMACROTILE_ENABLE;
}
if (rrbd->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
cbpitch |= R300_DEPTHMICROTILE_TILED;
}
BEGIN_BATCH_NO_AUTOSTATE(6);
OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
OUT_BATCH_RELOC(0, rrbd->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
OUT_BATCH_REGVAL(R300_ZB_DEPTHPITCH, cbpitch);
END_BATCH();
}
#endif
BEGIN_BATCH_NO_AUTOSTATE(6);
OUT_BATCH_REGSEQ(RB3D_COLOR_CHANNEL_MASK, 1);
if (flags & CLEARBUFFER_COLOR) {
OUT_BATCH((ctx->Color.ColorMask[BCOMP] ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
(ctx->Color.ColorMask[GCOMP] ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
(ctx->Color.ColorMask[RCOMP] ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
(ctx->Color.ColorMask[ACOMP] ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0));
} else {
OUT_BATCH(0);
}
{
uint32_t t1, t2;
t1 = 0x0;
t2 = 0x0;
if (flags & CLEARBUFFER_DEPTH) {
t1 |= R300_Z_ENABLE | R300_Z_WRITE_ENABLE;
t2 |=
(R300_ZS_ALWAYS << R300_Z_FUNC_SHIFT);
}
if (flags & CLEARBUFFER_STENCIL) {
t1 |= R300_STENCIL_ENABLE;
t2 |=
(R300_ZS_ALWAYS <<
R300_S_FRONT_FUNC_SHIFT) |
(R300_ZS_REPLACE <<
R300_S_FRONT_SFAIL_OP_SHIFT) |
(R300_ZS_REPLACE <<
R300_S_FRONT_ZPASS_OP_SHIFT) |
(R300_ZS_REPLACE <<
R300_S_FRONT_ZFAIL_OP_SHIFT);
}
OUT_BATCH_REGSEQ(R300_ZB_CNTL, 3);
OUT_BATCH(t1);
OUT_BATCH(t2);
OUT_BATCH(((ctx->Stencil.WriteMask[0] & R300_STENCILREF_MASK) <<
R300_STENCILWRITEMASK_SHIFT) |
(ctx->Stencil.Clear & R300_STENCILREF_MASK));
END_BATCH();
}
if (!rmesa->radeon.radeonScreen->kernel_mm) {
BEGIN_BATCH_NO_AUTOSTATE(9);
OUT_BATCH(cmdpacket3(r300->radeon.radeonScreen, R300_CMD_PACKET3_CLEAR));
OUT_BATCH_FLOAT32(dPriv->w / 2.0);
OUT_BATCH_FLOAT32(dPriv->h / 2.0);
OUT_BATCH_FLOAT32(ctx->Depth.Clear);
OUT_BATCH_FLOAT32(1.0);
OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
END_BATCH();
} else {
OUT_BATCH(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8));
OUT_BATCH(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING |
(1 << R300_PRIM_NUM_VERTICES_SHIFT));
OUT_BATCH_FLOAT32(dPriv->w / 2.0);
OUT_BATCH_FLOAT32(dPriv->h / 2.0);
OUT_BATCH_FLOAT32(ctx->Depth.Clear);
OUT_BATCH_FLOAT32(1.0);
OUT_BATCH_FLOAT32(ctx->Color.ClearColor[0]);
OUT_BATCH_FLOAT32(ctx->Color.ClearColor[1]);
OUT_BATCH_FLOAT32(ctx->Color.ClearColor[2]);
OUT_BATCH_FLOAT32(ctx->Color.ClearColor[3]);
}
r300EmitCacheFlush(rmesa);
cp_wait(&r300->radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
R300_STATECHANGE(r300, cb);
R300_STATECHANGE(r300, cmk);
R300_STATECHANGE(r300, zs);
}
static void r300EmitClearState(GLcontext * ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
BATCH_LOCALS(&r300->radeon);
__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
int i;
int has_tcl = 1;
int is_r500 = 0;
GLuint vap_cntl;
if (!(r300->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
has_tcl = 0;
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
is_r500 = 1;
/* State atom dirty tracking is a little subtle here.
*
* On the one hand, we need to make sure base state is emitted
* here if we start with an empty batch buffer, otherwise clear
* works incorrectly with multiple processes. Therefore, the first
* BEGIN_BATCH cannot be a BEGIN_BATCH_NO_AUTOSTATE.
*
* On the other hand, implicit state emission clears the state atom
* dirty bits, so we have to call R300_STATECHANGE later than the
* first BEGIN_BATCH.
*
* The final trickiness is that, because we change state, we need
* to ensure that any stored swtcl primitives are flushed properly
* before we start changing state. See the R300_NEWPRIM in r300Clear
* for this.
*/
BEGIN_BATCH(31);
OUT_BATCH_REGSEQ(R300_VAP_PROG_STREAM_CNTL_0, 1);
if (!has_tcl)
OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
else
OUT_BATCH(((((0 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_0_SHIFT) |
((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)));
OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
(R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
(R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
(R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT))
<< R300_SWIZZLE0_SHIFT) |
(((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
(R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
(R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) |
(R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) |
((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W) << R300_WRITE_ENA_SHIFT))
<< R300_SWIZZLE1_SHIFT)));
/* R300_VAP_INPUT_CNTL_0, R300_VAP_INPUT_CNTL_1 */
OUT_BATCH_REGSEQ(R300_VAP_VTX_STATE_CNTL, 2);
OUT_BATCH((R300_SEL_USER_COLOR_0 << R300_COLOR_0_ASSEMBLY_SHIFT));
OUT_BATCH(R300_INPUT_CNTL_POS | R300_INPUT_CNTL_COLOR | R300_INPUT_CNTL_TC0);
/* comes from fglrx startup of clear */
OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
OUT_BATCH(R300_VTX_W0_FMT | R300_VPORT_X_SCALE_ENA |
R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA |
R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA |
R300_VPORT_Z_OFFSET_ENA);
OUT_BATCH(0x8);
OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT |
R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT);
OUT_BATCH(0); /* no textures */
OUT_BATCH_REGVAL(R300_TX_ENABLE, 0);
OUT_BATCH_REGSEQ(R300_SE_VPORT_XSCALE, 6);
OUT_BATCH_FLOAT32(1.0);
OUT_BATCH_FLOAT32(dPriv->x);
OUT_BATCH_FLOAT32(1.0);
OUT_BATCH_FLOAT32(dPriv->y);
OUT_BATCH_FLOAT32(1.0);
OUT_BATCH_FLOAT32(0.0);
OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
OUT_BATCH(0x0);
OUT_BATCH(0x0);
END_BATCH();
R300_STATECHANGE(r300, vir[0]);
R300_STATECHANGE(r300, fogs);
R300_STATECHANGE(r300, vir[1]);
R300_STATECHANGE(r300, vic);
R300_STATECHANGE(r300, vte);
R300_STATECHANGE(r300, vof);
R300_STATECHANGE(r300, txe);
R300_STATECHANGE(r300, vpt);
R300_STATECHANGE(r300, at);
R300_STATECHANGE(r300, bld);
R300_STATECHANGE(r300, ps);
if (has_tcl) {
R300_STATECHANGE(r300, vap_clip_cntl);
BEGIN_BATCH_NO_AUTOSTATE(2);
OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_PS_UCP_MODE_CLIP_AS_TRIFAN | R300_CLIP_DISABLE);
END_BATCH();
}
BEGIN_BATCH_NO_AUTOSTATE(2);
OUT_BATCH_REGVAL(R300_GA_POINT_SIZE,
((dPriv->w * 6) << R300_POINTSIZE_X_SHIFT) |
((dPriv->h * 6) << R300_POINTSIZE_Y_SHIFT));
END_BATCH();
if (!is_r500) {
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
R300_STATECHANGE(r300, rr);
BEGIN_BATCH(14);
OUT_BATCH_REGSEQ(R300_RS_IP_0, 8);
for (i = 0; i < 8; ++i)
OUT_BATCH(R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3));
OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
OUT_BATCH(0x0);
OUT_BATCH_REGVAL(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE);
END_BATCH();
} else {
R300_STATECHANGE(r300, ri);
R300_STATECHANGE(r300, rc);
R300_STATECHANGE(r300, rr);
BEGIN_BATCH(14);
OUT_BATCH_REGSEQ(R500_RS_IP_0, 8);
for (i = 0; i < 8; ++i) {
OUT_BATCH((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
(R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
(R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
(R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT));
}
OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
OUT_BATCH((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN);
OUT_BATCH(0x0);
OUT_BATCH_REGVAL(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE);
END_BATCH();
}
if (!is_r500) {
R300_STATECHANGE(r300, fp);
R300_STATECHANGE(r300, fpi[0]);
R300_STATECHANGE(r300, fpi[1]);
R300_STATECHANGE(r300, fpi[2]);
R300_STATECHANGE(r300, fpi[3]);
BEGIN_BATCH(17);
OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
OUT_BATCH(0x0);
OUT_BATCH(0x0);
OUT_BATCH(0x0);
OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
OUT_BATCH(0x0);
OUT_BATCH(0x0);
OUT_BATCH(0x0);
OUT_BATCH(R300_RGBA_OUT);
OUT_BATCH_REGVAL(R300_US_ALU_RGB_INST_0,
FP_INSTRC(MAD, FP_ARGC(SRC0C_XYZ), FP_ARGC(ONE), FP_ARGC(ZERO)));
OUT_BATCH_REGVAL(R300_US_ALU_RGB_ADDR_0,
FP_SELC(0, NO, XYZ, FP_TMP(0), 0, 0));
OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_INST_0,
FP_INSTRA(MAD, FP_ARGA(SRC0A), FP_ARGA(ONE), FP_ARGA(ZERO)));
OUT_BATCH_REGVAL(R300_US_ALU_ALPHA_ADDR_0,
FP_SELA(0, NO, W, FP_TMP(0), 0, 0));
END_BATCH();
} else {
struct radeon_state_atom r500fp;
uint32_t _cmd[10];
R300_STATECHANGE(r300, fp);
R300_STATECHANGE(r300, r500fp);
BEGIN_BATCH(7);
OUT_BATCH_REGSEQ(R500_US_CONFIG, 2);
OUT_BATCH(R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
OUT_BATCH(0x0);
OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(1));
OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(1));
OUT_BATCH(R500_US_CODE_OFFSET_ADDR(0));
END_BATCH();
r500fp.check = check_r500fp;
r500fp.cmd = _cmd;
r500fp.cmd[0] = cmdr500fp(r300->radeon.radeonScreen, 0, 1, 0, 0);
r500fp.cmd[1] = R500_INST_TYPE_OUT |
R500_INST_TEX_SEM_WAIT |
R500_INST_LAST |
R500_INST_RGB_OMASK_R |
R500_INST_RGB_OMASK_G |
R500_INST_RGB_OMASK_B |
R500_INST_ALPHA_OMASK |
R500_INST_RGB_CLAMP |
R500_INST_ALPHA_CLAMP;
r500fp.cmd[2] = R500_RGB_ADDR0(0) |
R500_RGB_ADDR1(0) |
R500_RGB_ADDR1_CONST |
R500_RGB_ADDR2(0) |
R500_RGB_ADDR2_CONST;
r500fp.cmd[3] = R500_ALPHA_ADDR0(0) |
R500_ALPHA_ADDR1(0) |
R500_ALPHA_ADDR1_CONST |
R500_ALPHA_ADDR2(0) |
R500_ALPHA_ADDR2_CONST;
r500fp.cmd[4] = R500_ALU_RGB_SEL_A_SRC0 |
R500_ALU_RGB_R_SWIZ_A_R |
R500_ALU_RGB_G_SWIZ_A_G |
R500_ALU_RGB_B_SWIZ_A_B |
R500_ALU_RGB_SEL_B_SRC0 |
R500_ALU_RGB_R_SWIZ_B_R |
R500_ALU_RGB_B_SWIZ_B_G |
R500_ALU_RGB_G_SWIZ_B_B;
r500fp.cmd[5] = R500_ALPHA_OP_CMP |
R500_ALPHA_SWIZ_A_A |
R500_ALPHA_SWIZ_B_A;
r500fp.cmd[6] = R500_ALU_RGBA_OP_CMP |
R500_ALU_RGBA_R_SWIZ_0 |
R500_ALU_RGBA_G_SWIZ_0 |
R500_ALU_RGBA_B_SWIZ_0 |
R500_ALU_RGBA_A_SWIZ_0;
r500fp.cmd[7] = 0;
emit_r500fp(ctx, &r500fp);
}
BEGIN_BATCH(2);
OUT_BATCH_REGVAL(R300_VAP_PVS_STATE_FLUSH_REG, 0);
END_BATCH();
if (has_tcl) {
vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
(12 << R300_VF_MAX_VTX_NUM_SHIFT));
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
vap_cntl |= R500_TCL_STATE_OPTIMIZATION;
} else {
vap_cntl = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
(5 << R300_PVS_NUM_CNTLRS_SHIFT) |
(5 << R300_VF_MAX_VTX_NUM_SHIFT));
}
if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV515)
vap_cntl |= (2 << R300_PVS_NUM_FPUS_SHIFT);
else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) ||
(r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
(r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
vap_cntl |= (5 << R300_PVS_NUM_FPUS_SHIFT);
else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
(r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
vap_cntl |= (6 << R300_PVS_NUM_FPUS_SHIFT);
else if ((r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
(r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580))
vap_cntl |= (8 << R300_PVS_NUM_FPUS_SHIFT);
else
vap_cntl |= (4 << R300_PVS_NUM_FPUS_SHIFT);
R300_STATECHANGE(r300, vap_cntl);
BEGIN_BATCH(2);
OUT_BATCH_REGVAL(R300_VAP_CNTL, vap_cntl);
END_BATCH();
if (has_tcl) {
struct radeon_state_atom vpu;
uint32_t _cmd[10];
R300_STATECHANGE(r300, pvs);
R300_STATECHANGE(r300, vpi);
BEGIN_BATCH(4);
OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
(0 << R300_PVS_XYZW_VALID_INST_SHIFT) |
(1 << R300_PVS_LAST_INST_SHIFT));
OUT_BATCH((0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) |
(0 << R300_PVS_MAX_CONST_ADDR_SHIFT));
OUT_BATCH(1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
END_BATCH();
vpu.check = check_vpu;
vpu.cmd = _cmd;
vpu.cmd[0] = cmdvpu(r300->radeon.radeonScreen, 0, 2);
vpu.cmd[1] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE,
0, 0xf, PVS_DST_REG_OUT);
vpu.cmd[2] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_X, PVS_SRC_SELECT_Y,
PVS_SRC_SELECT_Z, PVS_SRC_SELECT_W,
PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
vpu.cmd[3] = PVS_SRC_OPERAND(0, PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
vpu.cmd[4] = 0x0;
vpu.cmd[5] = PVS_OP_DST_OPERAND(VE_ADD, GL_FALSE, GL_FALSE, 1, 0xf,
PVS_DST_REG_OUT);
vpu.cmd[6] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_X,
PVS_SRC_SELECT_Y, PVS_SRC_SELECT_Z,
PVS_SRC_SELECT_W, PVS_SRC_REG_INPUT,
VSF_FLAG_NONE);
vpu.cmd[7] = PVS_SRC_OPERAND(1, PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_SELECT_FORCE_0,
PVS_SRC_REG_INPUT, VSF_FLAG_NONE);
vpu.cmd[8] = 0x0;
r300->vap_flush_needed = GL_TRUE;
emit_vpu(ctx, &vpu);
}
}
static void r300KernelClear(GLcontext *ctx, GLuint flags)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
struct radeon_framebuffer *rfb = dPriv->driverPrivate;
struct radeon_renderbuffer *rrb;
struct radeon_renderbuffer *rrbd;
int bits = 0;
/* Make sure it fits there. */
rcommonEnsureCmdBufSpace(&r300->radeon, 421 * 3, __FUNCTION__);
if (flags || bits)
r300EmitClearState(ctx);
rrbd = radeon_get_renderbuffer(&rfb->base, BUFFER_DEPTH);
if (rrbd && (flags & BUFFER_BIT_DEPTH))
bits |= CLEARBUFFER_DEPTH;
if (rrbd && (flags & BUFFER_BIT_STENCIL))
bits |= CLEARBUFFER_STENCIL;
if (flags & BUFFER_BIT_COLOR0) {
rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_COLOR0);
r300ClearBuffer(r300, CLEARBUFFER_COLOR, rrb, NULL);
bits = 0;
}
if (flags & BUFFER_BIT_FRONT_LEFT) {
rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_FRONT_LEFT);
r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
bits = 0;
}
if (flags & BUFFER_BIT_BACK_LEFT) {
rrb = radeon_get_renderbuffer(&rfb->base, BUFFER_BACK_LEFT);
r300ClearBuffer(r300, bits | CLEARBUFFER_COLOR, rrb, rrbd);
bits = 0;
}
if (bits)
r300ClearBuffer(r300, bits, NULL, rrbd);
COMMIT_BATCH();
}
/**
* Buffer clear
*/
static void r300Clear(GLcontext * ctx, GLbitfield mask)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
__DRIdrawablePrivate *dPriv = r300->radeon.dri.drawable;
const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask);
GLbitfield swrast_mask = 0, tri_mask = 0;
int i;
struct gl_framebuffer *fb = ctx->DrawBuffer;
if (RADEON_DEBUG & DEBUG_IOCTL)
fprintf(stderr, "r300Clear\n");
if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
LOCK_HARDWARE(&r300->radeon);
UNLOCK_HARDWARE(&r300->radeon);
if (dPriv->numClipRects == 0)
return;
}
/* Flush swtcl vertices if necessary, because we will change hardware
* state during clear. See also the state-related comment in
* r300EmitClearState.
*/
R300_NEWPRIM(r300);
if (colorMask == ~0)
tri_mask |= (mask & BUFFER_BITS_COLOR);
/* HW stencil */
if (mask & BUFFER_BIT_STENCIL) {
tri_mask |= BUFFER_BIT_STENCIL;
}
/* HW depth */
if (mask & BUFFER_BIT_DEPTH) {
tri_mask |= BUFFER_BIT_DEPTH;
}
/* If we're doing a tri pass for depth/stencil, include a likely color
* buffer with it.
*/
for (i = 0; i < BUFFER_COUNT; i++) {
GLuint bufBit = 1 << i;
if ((tri_mask) & bufBit) {
if (!fb->Attachment[i].Renderbuffer->ClassID) {
tri_mask &= ~bufBit;
swrast_mask |= bufBit;
}
}
}
/* SW fallback clearing */
swrast_mask = mask & ~tri_mask;
if (tri_mask) {
if (r300->radeon.radeonScreen->kernel_mm)
r300UserClear(ctx, tri_mask);
else
r300KernelClear(ctx, tri_mask);
}
if (swrast_mask) {
if (RADEON_DEBUG & DEBUG_FALLBACKS)
fprintf(stderr, "%s: swrast clear, mask: %x\n",
__FUNCTION__, swrast_mask);
_swrast_Clear(ctx, swrast_mask);
}
}
void r300InitIoctlFuncs(struct dd_function_table *functions)
{
functions->Clear = r300Clear;
functions->Finish = radeonFinish;
functions->Flush = radeonFlush;
}

View File

@ -0,0 +1,44 @@
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
* Nicolai Haehnle <prefect_@gmx.net>
*/
#ifndef __R600_IOCTL_H__
#define __R600_IOCTL_H__
#include "r600_context.h"
#include "radeon_drm.h"
extern void r300InitIoctlFuncs(struct dd_function_table *functions);
#endif /* __R600_IOCTL_H__ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,548 @@
/**************************************************************************
Copyright (C) 2004 Nicolai Haehnle.
All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
on the rights to use, copy, modify, merge, publish, distribute, sub
license, and/or sell copies of the Software, and to permit persons to whom
the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/**
* \file
*
* \brief R300 Render (Vertex Buffer Implementation)
*
* The immediate implementation has been removed from CVS in favor of the vertex
* buffer implementation.
*
* The render functions are called by the pipeline manager to render a batch of
* primitives. They return TRUE to pass on to the next stage (i.e. software
* rasterization) or FALSE to indicate that the pipeline has finished after
* rendering something.
*
* When falling back to software TCL still attempt to use hardware
* rasterization.
*
* I am not sure that the cache related registers are setup correctly, but
* obviously this does work... Further investigation is needed.
*
* \author Nicolai Haehnle <prefect_@gmx.net>
*
* \todo Add immediate implementation back? Perhaps this is useful if there are
* no bugs...
*/
#include "main/glheader.h"
#include "main/state.h"
#include "main/imports.h"
#include "main/enums.h"
#include "main/macros.h"
#include "main/context.h"
#include "main/dd.h"
#include "main/simple_list.h"
#include "main/api_arrayelt.h"
#include "swrast/swrast.h"
#include "swrast_setup/swrast_setup.h"
#include "vbo/vbo.h"
#include "tnl/tnl.h"
#include "tnl/t_vp_build.h"
#include "radeon_reg.h"
#include "radeon_macros.h"
#include "r600_context.h"
#include "r600_ioctl.h"
#include "r600_state.h"
#include "r600_reg.h"
#include "r600_tex.h"
#include "r600_emit.h"
#include "r600_fragprog.h"
extern int future_hw_tcl_on;
/**
* \brief Convert a OpenGL primitive type into a R300 primitive type.
*/
int r300PrimitiveType(r300ContextPtr rmesa, int prim)
{
switch (prim & PRIM_MODE_MASK) {
case GL_POINTS:
return R300_VAP_VF_CNTL__PRIM_POINTS;
break;
case GL_LINES:
return R300_VAP_VF_CNTL__PRIM_LINES;
break;
case GL_LINE_STRIP:
return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
break;
case GL_LINE_LOOP:
return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
break;
case GL_TRIANGLES:
return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
break;
case GL_TRIANGLE_STRIP:
return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
break;
case GL_TRIANGLE_FAN:
return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
break;
case GL_QUADS:
return R300_VAP_VF_CNTL__PRIM_QUADS;
break;
case GL_QUAD_STRIP:
return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
break;
case GL_POLYGON:
return R300_VAP_VF_CNTL__PRIM_POLYGON;
break;
default:
assert(0);
return -1;
break;
}
}
int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
{
int verts_off = 0;
switch (prim & PRIM_MODE_MASK) {
case GL_POINTS:
verts_off = 0;
break;
case GL_LINES:
verts_off = num_verts % 2;
break;
case GL_LINE_STRIP:
if (num_verts < 2)
verts_off = num_verts;
break;
case GL_LINE_LOOP:
if (num_verts < 2)
verts_off = num_verts;
break;
case GL_TRIANGLES:
verts_off = num_verts % 3;
break;
case GL_TRIANGLE_STRIP:
if (num_verts < 3)
verts_off = num_verts;
break;
case GL_TRIANGLE_FAN:
if (num_verts < 3)
verts_off = num_verts;
break;
case GL_QUADS:
verts_off = num_verts % 4;
break;
case GL_QUAD_STRIP:
if (num_verts < 4)
verts_off = num_verts;
else
verts_off = num_verts % 2;
break;
case GL_POLYGON:
if (num_verts < 3)
verts_off = num_verts;
break;
default:
assert(0);
return -1;
break;
}
return num_verts - verts_off;
}
static void r300EmitElts(GLcontext * ctx, void *elts, unsigned long n_elts)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
void *out;
radeonAllocDmaRegion(&rmesa->radeon, &rmesa->radeon.tcl.elt_dma_bo,
&rmesa->radeon.tcl.elt_dma_offset, n_elts * 4, 4);
radeon_bo_map(rmesa->radeon.tcl.elt_dma_bo, 1);
out = rmesa->radeon.tcl.elt_dma_bo->ptr + rmesa->radeon.tcl.elt_dma_offset;
memcpy(out, elts, n_elts * 4);
radeon_bo_unmap(rmesa->radeon.tcl.elt_dma_bo);
}
static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type)
{
BATCH_LOCALS(&rmesa->radeon);
if (vertex_count > 0) {
BEGIN_BATCH(10);
OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
((vertex_count + 0) << 16) |
type |
R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
if (!rmesa->radeon.radeonScreen->kernel_mm) {
OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
(R300_VAP_PORT_IDX0 >> 2));
OUT_BATCH_RELOC(rmesa->radeon.tcl.elt_dma_offset,
rmesa->radeon.tcl.elt_dma_bo,
rmesa->radeon.tcl.elt_dma_offset,
RADEON_GEM_DOMAIN_GTT, 0, 0);
OUT_BATCH(vertex_count);
} else {
OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
(R300_VAP_PORT_IDX0 >> 2));
OUT_BATCH(rmesa->radeon.tcl.elt_dma_offset);
OUT_BATCH(vertex_count);
radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
rmesa->radeon.tcl.elt_dma_bo,
RADEON_GEM_DOMAIN_GTT, 0, 0);
}
END_BATCH();
}
}
static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
{
BATCH_LOCALS(&rmesa->radeon);
uint32_t voffset;
int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
int i;
if (RADEON_DEBUG & DEBUG_VERTS)
fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
offset);
if (!rmesa->radeon.radeonScreen->kernel_mm) {
BEGIN_BATCH(sz+2+(nr * 2));
OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
OUT_BATCH(nr);
for (i = 0; i + 1 < nr; i += 2) {
OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
(rmesa->radeon.tcl.aos[i].stride << 8) |
(rmesa->radeon.tcl.aos[i + 1].components << 16) |
(rmesa->radeon.tcl.aos[i + 1].stride << 24));
voffset = rmesa->radeon.tcl.aos[i + 0].offset +
offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
OUT_BATCH_RELOC(voffset,
rmesa->radeon.tcl.aos[i].bo,
voffset,
RADEON_GEM_DOMAIN_GTT,
0, 0);
voffset = rmesa->radeon.tcl.aos[i + 1].offset +
offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
OUT_BATCH_RELOC(voffset,
rmesa->radeon.tcl.aos[i+1].bo,
voffset,
RADEON_GEM_DOMAIN_GTT,
0, 0);
}
if (nr & 1) {
OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
(rmesa->radeon.tcl.aos[nr - 1].stride << 8));
voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
OUT_BATCH_RELOC(voffset,
rmesa->radeon.tcl.aos[nr - 1].bo,
voffset,
RADEON_GEM_DOMAIN_GTT,
0, 0);
}
END_BATCH();
} else {
BEGIN_BATCH(sz+2+(nr * 2));
OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
OUT_BATCH(nr);
for (i = 0; i + 1 < nr; i += 2) {
OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
(rmesa->radeon.tcl.aos[i].stride << 8) |
(rmesa->radeon.tcl.aos[i + 1].components << 16) |
(rmesa->radeon.tcl.aos[i + 1].stride << 24));
voffset = rmesa->radeon.tcl.aos[i + 0].offset +
offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
OUT_BATCH(voffset);
voffset = rmesa->radeon.tcl.aos[i + 1].offset +
offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
OUT_BATCH(voffset);
}
if (nr & 1) {
OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
(rmesa->radeon.tcl.aos[nr - 1].stride << 8));
voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
OUT_BATCH(voffset);
}
for (i = 0; i + 1 < nr; i += 2) {
voffset = rmesa->radeon.tcl.aos[i + 0].offset +
offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
rmesa->radeon.tcl.aos[i+0].bo,
RADEON_GEM_DOMAIN_GTT,
0, 0);
voffset = rmesa->radeon.tcl.aos[i + 1].offset +
offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
rmesa->radeon.tcl.aos[i+1].bo,
RADEON_GEM_DOMAIN_GTT,
0, 0);
}
if (nr & 1) {
voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
rmesa->radeon.tcl.aos[nr-1].bo,
RADEON_GEM_DOMAIN_GTT,
0, 0);
}
END_BATCH();
}
}
static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
{
BATCH_LOCALS(&rmesa->radeon);
BEGIN_BATCH(3);
OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
END_BATCH();
}
static void r300RunRenderPrimitive(r300ContextPtr rmesa, GLcontext * ctx,
int start, int end, int prim)
{
int type, num_verts;
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *vb = &tnl->vb;
type = r300PrimitiveType(rmesa, prim);
num_verts = r300NumVerts(rmesa, end - start, prim);
if (type < 0 || num_verts <= 0)
return;
/* Make space for at least 64 dwords.
* This is supposed to ensure that we can get all rendering
* commands into a single command buffer.
*/
rcommonEnsureCmdBufSpace(&rmesa->radeon, 64, __FUNCTION__);
if (vb->Elts) {
if (num_verts > 65535) {
/* not implemented yet */
WARN_ONCE("Too many elts\n");
return;
}
/* Note: The following is incorrect, but it's the best I can do
* without a major refactoring of how DMA memory is handled.
* The problem: Ensuring that both vertex arrays *and* index
* arrays are at the right position, and then ensuring that
* the LOAD_VBPNTR, DRAW_INDX and INDX_BUFFER packets are emitted
* at once.
*
* So why is the following incorrect? Well, it seems like
* allocating the index array might actually evict the vertex
* arrays. *sigh*
*/
r300EmitElts(ctx, vb->Elts, num_verts);
r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
r300FireEB(rmesa, num_verts, type);
} else {
r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start);
r300FireAOS(rmesa, num_verts, type);
}
COMMIT_BATCH();
}
static GLboolean r300RunRender(GLcontext * ctx,
struct tnl_pipeline_stage *stage)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
int i;
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *vb = &tnl->vb;
if (RADEON_DEBUG & DEBUG_PRIMS)
fprintf(stderr, "%s\n", __FUNCTION__);
r300UpdateShaders(rmesa);
if (r300EmitArrays(ctx))
return GL_TRUE;
r300UpdateShaderStates(rmesa);
r300EmitCacheFlush(rmesa);
radeonEmitState(&rmesa->radeon);
for (i = 0; i < vb->PrimitiveCount; i++) {
GLuint prim = _tnl_translate_prim(&vb->Primitive[i]);
GLuint start = vb->Primitive[i].start;
GLuint end = vb->Primitive[i].start + vb->Primitive[i].count;
r300RunRenderPrimitive(rmesa, ctx, start, end, prim);
}
r300EmitCacheFlush(rmesa);
radeonReleaseArrays(ctx, ~0);
return GL_FALSE;
}
#define FALLBACK_IF(expr) \
do { \
if (expr) { \
if (1 || RADEON_DEBUG & DEBUG_FALLBACKS) \
WARN_ONCE("Software fallback:%s\n", \
#expr); \
return R300_FALLBACK_RAST; \
} \
} while(0)
static int r300Fallback(GLcontext * ctx)
{
r300ContextPtr r300 = R300_CONTEXT(ctx);
const unsigned back = ctx->Stencil._BackFace;
FALLBACK_IF(r300->radeon.Fallback);
/* Do we need to use new-style shaders?
* Also is there a better way to do this? */
if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
struct r500_fragment_program *fp = (struct r500_fragment_program *)
(char *)ctx->FragmentProgram._Current;
if (fp) {
if (!fp->translated) {
r500TranslateFragmentShader(r300, fp);
FALLBACK_IF(!fp->translated);
}
}
} else {
struct r300_fragment_program *fp = (struct r300_fragment_program *)
(char *)ctx->FragmentProgram._Current;
if (fp) {
if (!fp->translated) {
r300TranslateFragmentShader(r300, fp);
FALLBACK_IF(!fp->translated);
}
}
}
FALLBACK_IF(ctx->RenderMode != GL_RENDER);
/* If GL_EXT_stencil_two_side is disabled, this fallback check can
* be removed.
*/
FALLBACK_IF(ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
|| ctx->Stencil.ValueMask[0] !=
ctx->Stencil.ValueMask[back]
|| ctx->Stencil.WriteMask[0] !=
ctx->Stencil.WriteMask[back]);
if (ctx->Extensions.NV_point_sprite || ctx->Extensions.ARB_point_sprite)
FALLBACK_IF(ctx->Point.PointSprite);
if (!r300->disable_lowimpact_fallback) {
FALLBACK_IF(ctx->Polygon.StippleFlag);
FALLBACK_IF(ctx->Multisample._Enabled);
FALLBACK_IF(ctx->Line.StippleFlag);
FALLBACK_IF(ctx->Line.SmoothFlag);
FALLBACK_IF(ctx->Point.SmoothFlag);
}
return R300_FALLBACK_NONE;
}
static GLboolean r300RunNonTCLRender(GLcontext * ctx,
struct tnl_pipeline_stage *stage)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
if (RADEON_DEBUG & DEBUG_PRIMS)
fprintf(stderr, "%s\n", __FUNCTION__);
if (r300Fallback(ctx) >= R300_FALLBACK_RAST)
return GL_TRUE;
if (!(rmesa->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL))
return GL_TRUE;
if (!r300ValidateBuffers(ctx))
return GL_TRUE;
return r300RunRender(ctx, stage);
}
static GLboolean r300RunTCLRender(GLcontext * ctx,
struct tnl_pipeline_stage *stage)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct r300_vertex_program *vp;
hw_tcl_on = future_hw_tcl_on;
if (RADEON_DEBUG & DEBUG_PRIMS)
fprintf(stderr, "%s\n", __FUNCTION__);
if (hw_tcl_on == GL_FALSE)
return GL_TRUE;
if (r300Fallback(ctx) >= R300_FALLBACK_TCL) {
hw_tcl_on = GL_FALSE;
return GL_TRUE;
}
if (!r300ValidateBuffers(ctx))
return GL_TRUE;
r300UpdateShaders(rmesa);
vp = (struct r300_vertex_program *)CURRENT_VERTEX_SHADER(ctx);
if (vp->native == GL_FALSE) {
hw_tcl_on = GL_FALSE;
return GL_TRUE;
}
return r300RunRender(ctx, stage);
}
const struct tnl_pipeline_stage _r300_render_stage = {
"r300 Hardware Rasterization",
NULL,
NULL,
NULL,
NULL,
r300RunNonTCLRender
};
const struct tnl_pipeline_stage _r300_tcl_stage = {
"r300 Hardware Transform, Clipping and Lighting",
NULL,
NULL,
NULL,
NULL,
r300RunTCLRender
};

View File

@ -0,0 +1,93 @@
#include "main/glheader.h"
#include "shader/program.h"
#include "tnl/tnl.h"
#include "r600_context.h"
#include "r600_fragprog.h"
static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
GLuint id)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct r300_vertex_program_cont *vp;
struct r300_fragment_program *r300_fp;
struct r500_fragment_program *r500_fp;
switch (target) {
case GL_VERTEX_STATE_PROGRAM_NV:
case GL_VERTEX_PROGRAM_ARB:
vp = CALLOC_STRUCT(r300_vertex_program_cont);
return _mesa_init_vertex_program(ctx, &vp->mesa_program,
target, id);
case GL_FRAGMENT_PROGRAM_ARB:
if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
r500_fp = CALLOC_STRUCT(r500_fragment_program);
r500_fp->ctx = ctx;
return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program,
target, id);
} else {
r300_fp = CALLOC_STRUCT(r300_fragment_program);
return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program,
target, id);
}
case GL_FRAGMENT_PROGRAM_NV:
if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
r500_fp = CALLOC_STRUCT(r500_fragment_program);
return _mesa_init_fragment_program(ctx, &r500_fp->mesa_program,
target, id);
} else {
r300_fp = CALLOC_STRUCT(r300_fragment_program);
return _mesa_init_fragment_program(ctx, &r300_fp->mesa_program,
target, id);
}
default:
_mesa_problem(ctx, "Bad target in r300NewProgram");
}
return NULL;
}
static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog)
{
_mesa_delete_program(ctx, prog);
}
static void
r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct r300_vertex_program_cont *vp = (void *)prog;
struct r300_fragment_program *r300_fp = (struct r300_fragment_program *)prog;
struct r500_fragment_program *r500_fp = (struct r500_fragment_program *)prog;
switch (target) {
case GL_VERTEX_PROGRAM_ARB:
vp->progs = NULL;
break;
case GL_FRAGMENT_PROGRAM_ARB:
if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
r500_fp->translated = GL_FALSE;
else
r300_fp->translated = GL_FALSE;
break;
}
/* need this for tcl fallbacks */
_tnl_program_string(ctx, target, prog);
}
static GLboolean
r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
{
return GL_TRUE;
}
void r300InitShaderFuncs(struct dd_function_table *functions)
{
functions->NewProgram = r300NewProgram;
functions->DeleteProgram = r300DeleteProgram;
functions->ProgramStringNotify = r300ProgramStringNotify;
functions->IsProgramNative = r300IsProgramNative;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,65 @@
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/*
* Authors:
* Nicolai Haehnle <prefect_@gmx.net>
*/
#ifndef __R600_STATE_H__
#define __R600_STATE_H__
#include "r600_context.h"
#define R300_NEWPRIM( rmesa ) \
do { \
if ( rmesa->radeon.dma.flush ) \
rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \
} while (0)
#define R300_STATECHANGE(r300, atom) \
do { \
R300_NEWPRIM(r300); \
r300->hw.atom.dirty = GL_TRUE; \
r300->radeon.hw.is_dirty = GL_TRUE; \
} while(0)
// r300_state.c
extern int future_hw_tcl_on;
void _tnl_UpdateFixedFunctionProgram (GLcontext * ctx);
void r300UpdateViewportOffset (GLcontext * ctx);
void r300UpdateDrawBuffer (GLcontext * ctx);
void r300UpdateStateParameters (GLcontext * ctx, GLuint new_state);
void r300UpdateShaders (r300ContextPtr rmesa);
void r300UpdateShaderStates (r300ContextPtr rmesa);
void r300InitState (r300ContextPtr r300);
void r300UpdateClipPlanes (GLcontext * ctx);
void r300InitStateFuncs (struct dd_function_table *functions);
#endif /* __R300_STATE_H__ */

View File

@ -0,0 +1,722 @@
/**************************************************************************
Copyright (C) 2007 Dave Airlie
All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
on the rights to use, copy, modify, merge, publish, distribute, sub
license, and/or sell copies of the Software, and to permit persons to whom
the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice (including the next
paragraph) shall be included in all copies or substantial portions of the
Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/*
* Authors:
* Dave Airlie <airlied@linux.ie>
* Maciej Cencora <m.cencora@gmail.com>
*/
#include "tnl/tnl.h"
#include "tnl/t_pipeline.h"
#include "r600_swtcl.h"
#include "r600_emit.h"
#include "r600_tex.h"
#define EMIT_ATTR( ATTR, STYLE ) \
do { \
rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \
rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \
rmesa->radeon.swtcl.vertex_attr_count++; \
} while (0)
#define EMIT_PAD( N ) \
do { \
rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \
rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \
rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \
rmesa->radeon.swtcl.vertex_attr_count++; \
} while (0)
#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask) \
do { \
attrs[num_attrs].attr = (_attr); \
attrs[num_attrs].format = (_format); \
attrs[num_attrs].dst_loc = (_dst_loc); \
attrs[num_attrs].swizzle = (_swizzle); \
attrs[num_attrs].write_mask = (_write_mask); \
++num_attrs; \
} while (0)
static void r300SwtclVAPSetup(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten)
{
r300ContextPtr rmesa = R300_CONTEXT( ctx );
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *VB = &tnl->vb;
struct vertex_attribute *attrs = rmesa->swtcl.vert_attrs;
int vte = 0;
int i, j, reg_count;
uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1];
uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1];
for (i = 0; i < R300_VIR_CMDSIZE-1; ++i)
vir0[i] = vir1[i] = 0;
for (i = 0, j = 0; i < rmesa->radeon.swtcl.vertex_attr_count; ++i) {
int tmp, data_format;
switch (attrs[i].format) {
case EMIT_1F:
data_format = R300_DATA_TYPE_FLOAT_1;
break;
case EMIT_2F:
data_format = R300_DATA_TYPE_FLOAT_2;
break;
case EMIT_3F:
data_format = R300_DATA_TYPE_FLOAT_3;
break;
case EMIT_4F:
data_format = R300_DATA_TYPE_FLOAT_4;
break;
case EMIT_4UB_4F_RGBA:
case EMIT_4UB_4F_ABGR:
data_format = R300_DATA_TYPE_BYTE | R300_NORMALIZE;
break;
default:
fprintf(stderr, "%s: Invalid data format type", __FUNCTION__);
_mesa_exit(-1);
break;
}
tmp = data_format | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT);
if (i % 2 == 0) {
vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT;
vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT);
} else {
vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT;
vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
++j;
}
}
reg_count = (rmesa->radeon.swtcl.vertex_attr_count + 1) >> 1;
if (rmesa->radeon.swtcl.vertex_attr_count % 2 != 0) {
vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
} else {
vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
}
R300_STATECHANGE(rmesa, vir[0]);
R300_STATECHANGE(rmesa, vir[1]);
R300_STATECHANGE(rmesa, vof);
R300_STATECHANGE(rmesa, vte);
R300_STATECHANGE(rmesa, vic);
if (rmesa->radeon.radeonScreen->kernel_mm) {
rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16;
rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16;
} else {
((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count;
((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count;
}
rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten);
vte = rmesa->hw.vte.cmd[1];
vte &= ~(R300_VTX_XY_FMT | R300_VTX_Z_FMT | R300_VTX_W0_FMT);
/* Important:
*/
if ( VB->NdcPtr != NULL ) {
VB->AttribPtr[VERT_ATTRIB_POS] = VB->NdcPtr;
vte |= R300_VTX_XY_FMT | R300_VTX_Z_FMT;
}
else {
VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
vte |= R300_VTX_W0_FMT;
}
assert( VB->AttribPtr[VERT_ATTRIB_POS] != NULL );
rmesa->hw.vte.cmd[1] = vte;
rmesa->hw.vte.cmd[2] = rmesa->radeon.swtcl.vertex_size;
}
static void r300SetVertexFormat( GLcontext *ctx )
{
r300ContextPtr rmesa = R300_CONTEXT( ctx );
TNLcontext *tnl = TNL_CONTEXT(ctx);
struct vertex_buffer *VB = &tnl->vb;
int fog_id = -1;
GLuint InputsRead = 0;
GLuint OutputsWritten = 0;
int num_attrs = 0;
struct vertex_attribute *attrs = rmesa->swtcl.vert_attrs;
rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
rmesa->radeon.swtcl.vertex_attr_count = 0;
if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POS)) {
InputsRead |= 1 << VERT_ATTRIB_POS;
OutputsWritten |= 1 << VERT_RESULT_HPOS;
EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
ADD_ATTR(VERT_ATTRIB_POS, EMIT_4F, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW);
rmesa->swtcl.coloroffset = 4;
}
if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR0)) {
InputsRead |= 1 << VERT_ATTRIB_COLOR0;
OutputsWritten |= 1 << VERT_RESULT_COL0;
#if MESA_LITTLE_ENDIAN
EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA );
ADD_ATTR(VERT_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW);
#else
EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR );
ADD_ATTR(VERT_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW);
#endif
}
if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_COLOR1 )) {
GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
InputsRead |= 1 << VERT_ATTRIB_COLOR1;
OutputsWritten |= 1 << VERT_RESULT_COL1;
#if MESA_LITTLE_ENDIAN
EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA );
ADD_ATTR(VERT_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA, SWTCL_OVM_COLOR1, swiz, MASK_XYZW);
#else
EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR );
ADD_ATTR(VERT_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR, SWTCL_OVM_COLOR1, swiz, MASK_XYZW);
#endif
rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1;
}
if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) {
GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE;
OutputsWritten |= 1 << VERT_RESULT_PSIZ;
EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
ADD_ATTR(VERT_ATTRIB_POINT_SIZE, EMIT_1F, SWTCL_OVM_POINT_SIZE, swiz, MASK_X);
}
if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_FOG)) {
/* find first free tex coord slot */
if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
int i;
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
if (!RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) {
fog_id = i;
break;
}
}
} else {
fog_id = 0;
}
if (fog_id == -1) {
fprintf(stderr, "\tout of free texcoords to do fog\n");
_mesa_exit(-1);
}
InputsRead |= 1 << VERT_ATTRIB_FOG;
OutputsWritten |= 1 << VERT_RESULT_FOGC;
GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F );
ADD_ATTR(VERT_ATTRIB_FOG, EMIT_1F, SWTCL_OVM_TEX(fog_id), swiz, MASK_X);
}
if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
int i;
GLuint swiz, mask, format;
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) {
switch (VB->TexCoordPtr[i]->size) {
case 1:
case 2:
format = EMIT_2F;
swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
mask = MASK_X | MASK_Y;
break;
case 3:
format = EMIT_3F;
swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
mask = MASK_X | MASK_Y | MASK_Z;
break;
case 4:
format = EMIT_4F;
swiz = SWIZZLE_XYZW;
mask = MASK_XYZW;
break;
default:
continue;
}
InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
EMIT_ATTR(_TNL_ATTRIB_TEX(i), format);
ADD_ATTR(VERT_ATTRIB_TEX0 + i, format, SWTCL_OVM_TEX(i), swiz, mask);
}
}
}
/* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */
if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) {
int first_free_tex = -1;
if (fog_id >= 0) {
first_free_tex = fog_id+1;
} else {
if (RENDERINPUTS_TEST_RANGE(tnl->render_inputs_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
int i;
for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
if (!RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX(i) )) {
first_free_tex = i;
break;
}
}
} else {
first_free_tex = 0;
}
}
if (first_free_tex == -1) {
fprintf(stderr, "\tout of free texcoords to write w pos\n");
_mesa_exit(-1);
}
InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex);
OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex);
EMIT_ATTR( _TNL_ATTRIB_TEX(first_free_tex), EMIT_4F );
ADD_ATTR(VERT_ATTRIB_TEX0 + first_free_tex, EMIT_4F, SWTCL_OVM_TEX(first_free_tex), SWIZZLE_XYZW, MASK_XYZW);
}
R300_NEWPRIM(rmesa);
r300SwtclVAPSetup(ctx, InputsRead, OutputsWritten);
rmesa->radeon.swtcl.vertex_size =
_tnl_install_attrs( ctx,
rmesa->radeon.swtcl.vertex_attrs,
rmesa->radeon.swtcl.vertex_attr_count,
NULL, 0 );
rmesa->radeon.swtcl.vertex_size /= 4;
RENDERINPUTS_COPY(rmesa->state.render_inputs_bitset, tnl->render_inputs_bitset);
}
static GLuint reduced_prim[] = {
GL_POINTS,
GL_LINES,
GL_LINES,
GL_LINES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
GL_TRIANGLES,
};
static void r300RasterPrimitive( GLcontext *ctx, GLuint prim );
static void r300RenderPrimitive( GLcontext *ctx, GLenum prim );
/***********************************************************************
* Emit primitives as inline vertices *
***********************************************************************/
#define HAVE_POINTS 1
#define HAVE_LINES 1
#define HAVE_LINE_STRIPS 1
#define HAVE_TRIANGLES 1
#define HAVE_TRI_STRIPS 1
#define HAVE_TRI_STRIP_1 0
#define HAVE_TRI_FANS 1
#define HAVE_QUADS 0
#define HAVE_QUAD_STRIPS 0
#define HAVE_POLYGONS 1
#define HAVE_ELTS 1
#undef LOCAL_VARS
#undef ALLOC_VERTS
#define CTX_ARG r300ContextPtr rmesa
#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
#define ALLOC_VERTS( n, size ) rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 )
#define LOCAL_VARS \
r300ContextPtr rmesa = R300_CONTEXT(ctx); \
const char *r300verts = (char *)rmesa->radeon.swtcl.verts;
#define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
#define VERTEX r300Vertex
#undef TAG
#define TAG(x) r300_##x
#include "tnl_dd/t_dd_triemit.h"
/***********************************************************************
* Macros for t_dd_tritmp.h to draw basic primitives *
***********************************************************************/
#define QUAD( a, b, c, d ) r300_quad( rmesa, a, b, c, d )
#define TRI( a, b, c ) r300_triangle( rmesa, a, b, c )
#define LINE( a, b ) r300_line( rmesa, a, b )
#define POINT( a ) r300_point( rmesa, a )
/***********************************************************************
* Build render functions from dd templates *
***********************************************************************/
#define R300_TWOSIDE_BIT 0x01
#define R300_UNFILLED_BIT 0x02
#define R300_MAX_TRIFUNC 0x04
static struct {
tnl_points_func points;
tnl_line_func line;
tnl_triangle_func triangle;
tnl_quad_func quad;
} rast_tab[R300_MAX_TRIFUNC];
#define DO_FALLBACK 0
#define DO_UNFILLED (IND & R300_UNFILLED_BIT)
#define DO_TWOSIDE (IND & R300_TWOSIDE_BIT)
#define DO_FLAT 0
#define DO_OFFSET 0
#define DO_TRI 1
#define DO_QUAD 1
#define DO_LINE 1
#define DO_POINTS 1
#define DO_FULL_QUAD 1
#define HAVE_RGBA 1
#define HAVE_SPEC 1
#define HAVE_BACK_COLORS 0
#define HAVE_HW_FLATSHADE 1
#define TAB rast_tab
#define DEPTH_SCALE 1.0
#define UNFILLED_TRI unfilled_tri
#define UNFILLED_QUAD unfilled_quad
#define VERT_X(_v) _v->v.x
#define VERT_Y(_v) _v->v.y
#define VERT_Z(_v) _v->v.z
#define AREA_IS_CCW( a ) (a < 0)
#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
#define VERT_SET_RGBA( v, c ) \
do { \
r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \
UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \
UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \
UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \
UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \
} while (0)
#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
#define VERT_SET_SPEC( v0, c ) \
do { \
if (specoffset) { \
UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \
UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \
UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \
} \
} while (0)
#define VERT_COPY_SPEC( v0, v1 ) \
do { \
if (specoffset) { \
v0->v.specular.red = v1->v.specular.red; \
v0->v.specular.green = v1->v.specular.green; \
v0->v.specular.blue = v1->v.specular.blue; \
} \
} while (0)
#define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset]
#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
#define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset]
#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
#undef LOCAL_VARS
#undef TAG
#undef INIT
#define LOCAL_VARS(n) \
r300ContextPtr rmesa = R300_CONTEXT(ctx); \
GLuint color[n] = { 0, }, spec[n] = { 0, }; \
GLuint coloroffset = rmesa->swtcl.coloroffset; \
GLuint specoffset = rmesa->swtcl.specoffset; \
(void) color; (void) spec; (void) coloroffset; (void) specoffset;
/***********************************************************************
* Helpers for rendering unfilled primitives *
***********************************************************************/
#define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] )
#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
#undef TAG
#define TAG(x) x
#include "tnl_dd/t_dd_unfilled.h"
#undef IND
/***********************************************************************
* Generate GL render functions *
***********************************************************************/
#define IND (0)
#define TAG(x) x
#include "tnl_dd/t_dd_tritmp.h"
#define IND (R300_TWOSIDE_BIT)
#define TAG(x) x##_twoside
#include "tnl_dd/t_dd_tritmp.h"
#define IND (R300_UNFILLED_BIT)
#define TAG(x) x##_unfilled
#include "tnl_dd/t_dd_tritmp.h"
#define IND (R300_TWOSIDE_BIT|R300_UNFILLED_BIT)
#define TAG(x) x##_twoside_unfilled
#include "tnl_dd/t_dd_tritmp.h"
static void init_rast_tab( void )
{
init();
init_twoside();
init_unfilled();
init_twoside_unfilled();
}
/**********************************************************************/
/* Render unclipped begin/end objects */
/**********************************************************************/
#define RENDER_POINTS( start, count ) \
for ( ; start < count ; start++) \
r300_point( rmesa, VERT(start) )
#define RENDER_LINE( v0, v1 ) \
r300_line( rmesa, VERT(v0), VERT(v1) )
#define RENDER_TRI( v0, v1, v2 ) \
r300_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
#define RENDER_QUAD( v0, v1, v2, v3 ) \
r300_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
#define INIT(x) do { \
r300RenderPrimitive( ctx, x ); \
} while (0)
#undef LOCAL_VARS
#define LOCAL_VARS \
r300ContextPtr rmesa = R300_CONTEXT(ctx); \
const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \
const char *r300verts = (char *)rmesa->radeon.swtcl.verts; \
const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \
const GLboolean stipple = ctx->Line.StippleFlag; \
(void) elt; (void) stipple;
#define RESET_STIPPLE //if ( stipple ) r200ResetLineStipple( ctx );
#define RESET_OCCLUSION
#define PRESERVE_VB_DEFS
#define ELT(x) (x)
#define TAG(x) r300_##x##_verts
#include "tnl/t_vb_rendertmp.h"
#undef ELT
#undef TAG
#define TAG(x) r300_##x##_elts
#define ELT(x) elt[x]
#include "tnl/t_vb_rendertmp.h"
/**********************************************************************/
/* Choose render functions */
/**********************************************************************/
static void r300ChooseRenderState( GLcontext *ctx )
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
r300ContextPtr rmesa = R300_CONTEXT(ctx);
GLuint index = 0;
GLuint flags = ctx->_TriangleCaps;
if (flags & DD_TRI_LIGHT_TWOSIDE) index |= R300_TWOSIDE_BIT;
if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT;
if (index != rmesa->radeon.swtcl.RenderIndex) {
tnl->Driver.Render.Points = rast_tab[index].points;
tnl->Driver.Render.Line = rast_tab[index].line;
tnl->Driver.Render.ClippedLine = rast_tab[index].line;
tnl->Driver.Render.Triangle = rast_tab[index].triangle;
tnl->Driver.Render.Quad = rast_tab[index].quad;
if (index == 0) {
tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts;
tnl->Driver.Render.PrimTabElts = r300_render_tab_elts;
tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly;
} else {
tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
}
rmesa->radeon.swtcl.RenderIndex = index;
}
}
static void r300RenderStart(GLcontext *ctx)
{
r300ContextPtr rmesa = R300_CONTEXT( ctx );
r300ChooseRenderState(ctx);
r300SetVertexFormat(ctx);
r300ValidateBuffers(ctx);
r300UpdateShaders(rmesa);
r300UpdateShaderStates(rmesa);
r300EmitCacheFlush(rmesa);
/* investigate if we can put back flush optimisation if needed */
if (rmesa->radeon.dma.flush != NULL) {
rmesa->radeon.dma.flush(ctx);
}
}
static void r300RenderFinish(GLcontext *ctx)
{
}
static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
R300_NEWPRIM( rmesa );
rmesa->radeon.swtcl.hw_primitive = hwprim;
}
}
static void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
rmesa->radeon.swtcl.render_primitive = prim;
if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
return;
r300RasterPrimitive( ctx, reduced_prim[prim] );
}
static void r300ResetLineStipple(GLcontext *ctx)
{
}
void r300InitSwtcl(GLcontext *ctx)
{
TNLcontext *tnl = TNL_CONTEXT(ctx);
r300ContextPtr rmesa = R300_CONTEXT(ctx);
static int firsttime = 1;
if (firsttime) {
init_rast_tab();
firsttime = 0;
}
tnl->Driver.Render.Start = r300RenderStart;
tnl->Driver.Render.Finish = r300RenderFinish;
tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple;
tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
tnl->Driver.Render.CopyPV = _tnl_copy_pv;
tnl->Driver.Render.Interp = _tnl_interp;
/* FIXME: what are these numbers? */
_tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
48 * sizeof(GLfloat) );
rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
rmesa->radeon.swtcl.RenderIndex = ~0;
rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
rmesa->radeon.swtcl.hw_primitive = 0;
_tnl_invalidate_vertex_state( ctx, ~0 );
_tnl_invalidate_vertices( ctx, ~0 );
_tnl_need_projected_coords( ctx, GL_FALSE );
r300ChooseRenderState(ctx);
}
void r300DestroySwtcl(GLcontext *ctx)
{
}
static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
{
BATCH_LOCALS(&rmesa->radeon);
if (RADEON_DEBUG & DEBUG_VERTS)
fprintf(stderr, "%s: vertex_size %d, offset 0x%x \n",
__FUNCTION__, vertex_size, offset);
BEGIN_BATCH(7);
OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
OUT_BATCH(1);
OUT_BATCH(vertex_size | (vertex_size << 8));
OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
END_BATCH();
}
static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
{
BATCH_LOCALS(&rmesa->radeon);
int type, num_verts;
type = r300PrimitiveType(rmesa, primitive);
num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
BEGIN_BATCH(3);
OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
END_BATCH();
}
void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
rcommonEnsureCmdBufSpace(&rmesa->radeon,
rmesa->radeon.hw.max_state_size + (12*sizeof(int)),
__FUNCTION__);
radeonEmitState(&rmesa->radeon);
r300EmitVertexAOS(rmesa,
rmesa->radeon.swtcl.vertex_size,
rmesa->radeon.dma.current,
current_offset);
r300EmitVbufPrim(rmesa,
rmesa->radeon.swtcl.hw_primitive,
rmesa->radeon.swtcl.numverts);
r300EmitCacheFlush(rmesa);
COMMIT_BATCH();
}

View File

@ -0,0 +1,62 @@
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com> - original r200 code
* Dave Airlie <airlied@linux.ie>
*/
#ifndef __R600_SWTCL_H__
#define __R600_SWTCL_H__
#include "main/mtypes.h"
#include "swrast/swrast.h"
#include "r600_context.h"
#define MASK_XYZW (R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
#define MASK_X R300_WRITE_ENA_X
#define MASK_Y R300_WRITE_ENA_Y
#define MASK_Z R300_WRITE_ENA_Z
#define MASK_W R300_WRITE_ENA_W
/*
* Here are definitions of OVM locations of vertex attributes for non TCL hw
*/
#define SWTCL_OVM_POS 0
#define SWTCL_OVM_COLOR0 2
#define SWTCL_OVM_COLOR1 3
#define SWTCL_OVM_TEX(n) ((n) + 6)
#define SWTCL_OVM_POINT_SIZE 15
extern void r300InitSwtcl( GLcontext *ctx );
extern void r300DestroySwtcl( GLcontext *ctx );
extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
#endif

View File

@ -0,0 +1,347 @@
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/**
* \file
*
* \author Keith Whitwell <keith@tungstengraphics.com>
*/
#include "main/glheader.h"
#include "main/imports.h"
#include "main/colormac.h"
#include "main/context.h"
#include "main/enums.h"
#include "main/image.h"
#include "main/mipmap.h"
#include "main/simple_list.h"
#include "main/texformat.h"
#include "main/texstore.h"
#include "main/teximage.h"
#include "main/texobj.h"
#include "texmem.h"
#include "r600_context.h"
#include "r600_state.h"
#include "r600_ioctl.h"
#include "radeon_mipmap_tree.h"
#include "r600_tex.h"
#include "xmlpool.h"
static unsigned int translate_wrap_mode(GLenum wrapmode)
{
switch(wrapmode) {
case GL_REPEAT: return R300_TX_REPEAT;
case GL_CLAMP: return R300_TX_CLAMP;
case GL_CLAMP_TO_EDGE: return R300_TX_CLAMP_TO_EDGE;
case GL_CLAMP_TO_BORDER: return R300_TX_CLAMP_TO_BORDER;
case GL_MIRRORED_REPEAT: return R300_TX_REPEAT | R300_TX_MIRRORED;
case GL_MIRROR_CLAMP_EXT: return R300_TX_CLAMP | R300_TX_MIRRORED;
case GL_MIRROR_CLAMP_TO_EDGE_EXT: return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
case GL_MIRROR_CLAMP_TO_BORDER_EXT: return R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
default:
_mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__);
return 0;
}
}
/**
* Update the cached hardware registers based on the current texture wrap modes.
*
* \param t Texture object whose wrap modes are to be set
*/
static void r300UpdateTexWrap(radeonTexObjPtr t)
{
struct gl_texture_object *tObj = &t->base;
t->pp_txfilter &=
~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
if (tObj->Target != GL_TEXTURE_1D) {
t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
if (tObj->Target == GL_TEXTURE_3D)
t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
}
}
static GLuint aniso_filter(GLfloat anisotropy)
{
if (anisotropy >= 16.0) {
return R300_TX_MAX_ANISO_16_TO_1;
} else if (anisotropy >= 8.0) {
return R300_TX_MAX_ANISO_8_TO_1;
} else if (anisotropy >= 4.0) {
return R300_TX_MAX_ANISO_4_TO_1;
} else if (anisotropy >= 2.0) {
return R300_TX_MAX_ANISO_2_TO_1;
} else {
return R300_TX_MAX_ANISO_1_TO_1;
}
}
/**
* Set the texture magnification and minification modes.
*
* \param t Texture whose filter modes are to be set
* \param minf Texture minification mode
* \param magf Texture magnification mode
* \param anisotropy Maximum anisotropy level
*/
static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
{
/* Force revalidation to account for switches from/to mipmapping. */
t->validated = GL_FALSE;
t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
/* Note that EXT_texture_filter_anisotropic is extremely vague about
* how anisotropic filtering interacts with the "normal" filter modes.
* When anisotropic filtering is enabled, we override min and mag
* filter settings completely. This includes driconf's settings.
*/
if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO
| R300_TX_MIN_FILTER_ANISO
| R300_TX_MIN_FILTER_MIP_LINEAR
| aniso_filter(anisotropy);
if (RADEON_DEBUG & DEBUG_TEXTURE)
fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy);
return;
}
switch (minf) {
case GL_NEAREST:
t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST;
break;
case GL_LINEAR:
t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR;
break;
case GL_NEAREST_MIPMAP_NEAREST:
t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
break;
case GL_NEAREST_MIPMAP_LINEAR:
t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
break;
case GL_LINEAR_MIPMAP_NEAREST:
t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
break;
case GL_LINEAR_MIPMAP_LINEAR:
t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
break;
}
/* Note we don't have 3D mipmaps so only use the mag filter setting
* to set the 3D texture filter mode.
*/
switch (magf) {
case GL_NEAREST:
t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST;
break;
case GL_LINEAR:
t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR;
break;
}
}
static void r300SetTexBorderColor(radeonTexObjPtr t, GLubyte c[4])
{
t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
}
/**
* Changes variables and flags for a state update, which will happen at the
* next UpdateTextureState
*/
static void r300TexParameter(GLcontext * ctx, GLenum target,
struct gl_texture_object *texObj,
GLenum pname, const GLfloat * params)
{
radeonTexObj* t = radeon_tex_obj(texObj);
if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
fprintf(stderr, "%s( %s )\n", __FUNCTION__,
_mesa_lookup_enum_by_nr(pname));
}
switch (pname) {
case GL_TEXTURE_MIN_FILTER:
case GL_TEXTURE_MAG_FILTER:
case GL_TEXTURE_MAX_ANISOTROPY_EXT:
r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
break;
case GL_TEXTURE_WRAP_S:
case GL_TEXTURE_WRAP_T:
case GL_TEXTURE_WRAP_R:
r300UpdateTexWrap(t);
break;
case GL_TEXTURE_BORDER_COLOR:
r300SetTexBorderColor(t, texObj->_BorderChan);
break;
case GL_TEXTURE_BASE_LEVEL:
case GL_TEXTURE_MAX_LEVEL:
case GL_TEXTURE_MIN_LOD:
case GL_TEXTURE_MAX_LOD:
/* This isn't the most efficient solution but there doesn't appear to
* be a nice alternative. Since there's no LOD clamping,
* we just have to rely on loading the right subset of mipmap levels
* to simulate a clamped LOD.
*/
if (t->mt) {
radeon_miptree_unreference(t->mt);
t->mt = 0;
t->validated = GL_FALSE;
}
break;
case GL_DEPTH_TEXTURE_MODE:
if (!texObj->Image[0][texObj->BaseLevel])
return;
if (texObj->Image[0][texObj->BaseLevel]->TexFormat->BaseFormat
== GL_DEPTH_COMPONENT) {
r300SetDepthTexMode(texObj);
break;
} else {
/* If the texture isn't a depth texture, changing this
* state won't cause any changes to the hardware.
* Don't force a flush of texture state.
*/
return;
}
default:
return;
}
}
static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
radeonTexObj* t = radeon_tex_obj(texObj);
if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
(void *)texObj,
_mesa_lookup_enum_by_nr(texObj->Target));
}
if (rmesa) {
int i;
radeon_firevertices(&rmesa->radeon);
for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
if (rmesa->hw.textures[i] == t)
rmesa->hw.textures[i] = 0;
}
if (t->bo) {
radeon_bo_unref(t->bo);
t->bo = NULL;
}
if (t->mt) {
radeon_miptree_unreference(t->mt);
t->mt = 0;
}
_mesa_delete_texture_object(ctx, texObj);
}
/**
* Allocate a new texture object.
* Called via ctx->Driver.NewTextureObject.
* Note: this function will be called during context creation to
* allocate the default texture objects.
* Fixup MaxAnisotropy according to user preference.
*/
static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
GLuint name,
GLenum target)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
if (RADEON_DEBUG & (DEBUG_STATE | DEBUG_TEXTURE)) {
fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
t, _mesa_lookup_enum_by_nr(target));
}
_mesa_initialize_texture_object(&t->base, name, target);
t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
/* Initialize hardware state */
r300UpdateTexWrap(t);
r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
r300SetTexBorderColor(t, t->base._BorderChan);
return &t->base;
}
void r300InitTextureFuncs(struct dd_function_table *functions)
{
/* Note: we only plug in the functions we implement in the driver
* since _mesa_init_driver_functions() was already called.
*/
functions->NewTextureImage = radeonNewTextureImage;
functions->FreeTexImageData = radeonFreeTexImageData;
functions->MapTexture = radeonMapTexture;
functions->UnmapTexture = radeonUnmapTexture;
functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
functions->TexImage1D = radeonTexImage1D;
functions->TexImage2D = radeonTexImage2D;
functions->TexImage3D = radeonTexImage3D;
functions->TexSubImage1D = radeonTexSubImage1D;
functions->TexSubImage2D = radeonTexSubImage2D;
functions->TexSubImage3D = radeonTexSubImage3D;
functions->GetTexImage = radeonGetTexImage;
functions->GetCompressedTexImage = radeonGetCompressedTexImage;
functions->NewTextureObject = r300NewTextureObject;
functions->DeleteTexture = r300DeleteTexture;
functions->IsTextureResident = driIsTextureResident;
functions->TexParameter = r300TexParameter;
functions->CompressedTexImage2D = radeonCompressedTexImage2D;
functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
functions->GenerateMipmap = radeonGenerateMipmap;
driInitTextureFormats();
}

View File

@ -0,0 +1,54 @@
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/*
* Authors:
* Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef __r600_TEX_H__
#define __r600_TEX_H__
extern void r300SetDepthTexMode(struct gl_texture_object *tObj);
extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
__DRIdrawable *dPriv);
extern void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
GLint format, __DRIdrawable *dPriv);
extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
unsigned long long offset, GLint depth,
GLuint pitch);
extern GLboolean r300ValidateBuffers(GLcontext * ctx);
extern void r300InitTextureFuncs(struct dd_function_table *functions);
#endif /* __r600_TEX_H__ */

View File

@ -0,0 +1,488 @@
/*
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/**
* \file
*
* \author Keith Whitwell <keith@tungstengraphics.com>
*
* \todo Enable R300 texture tiling code?
*/
#include "main/glheader.h"
#include "main/imports.h"
#include "main/context.h"
#include "main/macros.h"
#include "main/texformat.h"
#include "main/teximage.h"
#include "main/texobj.h"
#include "main/enums.h"
#include "r600_context.h"
#include "r600_state.h"
#include "r600_ioctl.h"
#include "radeon_mipmap_tree.h"
#include "r600_tex.h"
#include "r600_reg.h"
#define VALID_FORMAT(f) ( ((f) <= MESA_FORMAT_RGBA_DXT5 \
|| ((f) >= MESA_FORMAT_RGBA_FLOAT32 && \
(f) <= MESA_FORMAT_INTENSITY_FLOAT16)) \
&& tx_table[f].flag )
#define _ASSIGN(entry, format) \
[ MESA_FORMAT_ ## entry ] = { format, 0, 1}
/*
* Note that the _REV formats are the same as the non-REV formats. This is
* because the REV and non-REV formats are identical as a byte string, but
* differ when accessed as 16-bit or 32-bit words depending on the endianness of
* the host. Since the textures are transferred to the R300 as a byte string
* (i.e. without any byte-swapping), the R300 sees the REV and non-REV formats
* identically. -- paulus
*/
static const struct tx_table {
GLuint format, filter, flag;
} tx_table[] = {
/* *INDENT-OFF* */
#ifdef MESA_LITTLE_ENDIAN
_ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)),
_ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)),
_ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)),
_ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)),
#else
_ASSIGN(RGBA8888, R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8)),
_ASSIGN(RGBA8888_REV, R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8)),
_ASSIGN(ARGB8888, R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8)),
_ASSIGN(ARGB8888_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8)),
#endif
_ASSIGN(RGB888, R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8)),
_ASSIGN(RGB565, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
_ASSIGN(RGB565_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5)),
_ASSIGN(ARGB4444, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)),
_ASSIGN(ARGB4444_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4)),
_ASSIGN(ARGB1555, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)),
_ASSIGN(ARGB1555_REV, R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5)),
_ASSIGN(AL88, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)),
_ASSIGN(AL88_REV, R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8)),
_ASSIGN(RGB332, R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2)),
_ASSIGN(A8, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8)),
_ASSIGN(L8, R300_EASY_TX_FORMAT(X, X, X, ONE, X8)),
_ASSIGN(I8, R300_EASY_TX_FORMAT(X, X, X, X, X8)),
_ASSIGN(CI8, R300_EASY_TX_FORMAT(X, X, X, X, X8)),
_ASSIGN(YCBCR, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE),
_ASSIGN(YCBCR_REV, R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE),
_ASSIGN(RGB_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1)),
_ASSIGN(RGBA_DXT1, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1)),
_ASSIGN(RGBA_DXT3, R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3)),
_ASSIGN(RGBA_DXT5, R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5)),
_ASSIGN(RGBA_FLOAT32, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32)),
_ASSIGN(RGBA_FLOAT16, R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16)),
_ASSIGN(RGB_FLOAT32, 0xffffffff),
_ASSIGN(RGB_FLOAT16, 0xffffffff),
_ASSIGN(ALPHA_FLOAT32, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32)),
_ASSIGN(ALPHA_FLOAT16, R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16)),
_ASSIGN(LUMINANCE_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32)),
_ASSIGN(LUMINANCE_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16)),
_ASSIGN(LUMINANCE_ALPHA_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32)),
_ASSIGN(LUMINANCE_ALPHA_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16)),
_ASSIGN(INTENSITY_FLOAT32, R300_EASY_TX_FORMAT(X, X, X, X, FL_I32)),
_ASSIGN(INTENSITY_FLOAT16, R300_EASY_TX_FORMAT(X, X, X, X, FL_I16)),
_ASSIGN(Z16, R300_EASY_TX_FORMAT(X, X, X, X, X16)),
_ASSIGN(Z24_S8, R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8)),
_ASSIGN(Z32, R300_EASY_TX_FORMAT(X, X, X, X, X32)),
/* *INDENT-ON* */
};
#undef _ASSIGN
void r300SetDepthTexMode(struct gl_texture_object *tObj)
{
static const GLuint formats[3][3] = {
{
R300_EASY_TX_FORMAT(X, X, X, ONE, X16),
R300_EASY_TX_FORMAT(X, X, X, X, X16),
R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X16),
},
{
R300_EASY_TX_FORMAT(X, X, X, ONE, X24_Y8),
R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8),
R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X24_Y8),
},
{
R300_EASY_TX_FORMAT(X, X, X, ONE, X32),
R300_EASY_TX_FORMAT(X, X, X, X, X32),
R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X32),
},
};
const GLuint *format;
radeonTexObjPtr t;
if (!tObj)
return;
t = radeon_tex_obj(tObj);
switch (tObj->Image[0][tObj->BaseLevel]->TexFormat->MesaFormat) {
case MESA_FORMAT_Z16:
format = formats[0];
break;
case MESA_FORMAT_Z24_S8:
format = formats[1];
break;
case MESA_FORMAT_Z32:
format = formats[2];
break;
default:
/* Error...which should have already been caught by higher
* levels of Mesa.
*/
ASSERT(0);
return;
}
switch (tObj->DepthMode) {
case GL_LUMINANCE:
t->pp_txformat = format[0];
break;
case GL_INTENSITY:
t->pp_txformat = format[1];
break;
case GL_ALPHA:
t->pp_txformat = format[2];
break;
default:
/* Error...which should have already been caught by higher
* levels of Mesa.
*/
ASSERT(0);
return;
}
}
/**
* Compute the cached hardware register values for the given texture object.
*
* \param rmesa Context pointer
* \param t the r300 texture object
*/
static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
{
const struct gl_texture_image *firstImage;
int firstlevel = t->mt ? t->mt->firstLevel : 0;
firstImage = t->base.Image[0][firstlevel];
if (!t->image_override
&& VALID_FORMAT(firstImage->TexFormat->MesaFormat)) {
if (firstImage->TexFormat->BaseFormat == GL_DEPTH_COMPONENT) {
r300SetDepthTexMode(&t->base);
} else {
t->pp_txformat = tx_table[firstImage->TexFormat->MesaFormat].format;
}
t->pp_txfilter |= tx_table[firstImage->TexFormat->MesaFormat].filter;
} else if (!t->image_override) {
_mesa_problem(NULL, "unexpected texture format in %s",
__FUNCTION__);
return;
}
if (t->image_override && t->bo)
return;
t->pp_txsize = (((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)
| ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)
| ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)
| ((t->mt->lastLevel - t->mt->firstLevel) << R300_TX_MAX_MIP_LEVEL_SHIFT));
t->tile_bits = 0;
if (t->base.Target == GL_TEXTURE_CUBE_MAP)
t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP;
if (t->base.Target == GL_TEXTURE_3D)
t->pp_txformat |= R300_TX_FORMAT_3D;
if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
unsigned int align = (64 / t->mt->bpp) - 1;
t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
if (!t->image_override)
t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1;
}
if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
if (firstImage->Width > 2048)
t->pp_txpitch |= R500_TXWIDTH_BIT11;
if (firstImage->Height > 2048)
t->pp_txpitch |= R500_TXHEIGHT_BIT11;
}
}
/**
* Ensure the given texture is ready for rendering.
*
* Mostly this means populating the texture object's mipmap tree.
*/
static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
radeonTexObj *t = radeon_tex_obj(texObj);
if (!radeon_validate_texture_miptree(ctx, texObj))
return GL_FALSE;
/* Configure the hardware registers (more precisely, the cached version
* of the hardware registers). */
setup_hardware_state(rmesa, t);
t->validated = GL_TRUE;
return GL_TRUE;
}
/**
* Ensure all enabled and complete textures are uploaded along with any buffers being used.
*/
GLboolean r300ValidateBuffers(GLcontext * ctx)
{
r300ContextPtr rmesa = R300_CONTEXT(ctx);
struct radeon_renderbuffer *rrb;
int i;
radeon_validate_reset_bos(&rmesa->radeon);
rrb = radeon_get_colorbuffer(&rmesa->radeon);
/* color buffer */
if (rrb && rrb->bo) {
radeon_validate_bo(&rmesa->radeon, rrb->bo,
0, RADEON_GEM_DOMAIN_VRAM);
}
/* depth buffer */
rrb = radeon_get_depthbuffer(&rmesa->radeon);
if (rrb && rrb->bo) {
radeon_validate_bo(&rmesa->radeon, rrb->bo,
0, RADEON_GEM_DOMAIN_VRAM);
}
for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
radeonTexObj *t;
if (!ctx->Texture.Unit[i]._ReallyEnabled)
continue;
if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) {
_mesa_warning(ctx,
"failed to validate texture for unit %d.\n",
i);
}
t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
if (t->image_override && t->bo)
radeon_validate_bo(&rmesa->radeon, t->bo,
RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
else if (t->mt->bo)
radeon_validate_bo(&rmesa->radeon, t->mt->bo,
RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
}
if (rmesa->radeon.dma.current)
radeon_validate_bo(&rmesa->radeon, rmesa->radeon.dma.current, RADEON_GEM_DOMAIN_GTT, 0);
return radeon_revalidate_bos(ctx);
}
void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
unsigned long long offset, GLint depth, GLuint pitch)
{
r300ContextPtr rmesa = pDRICtx->driverPrivate;
struct gl_texture_object *tObj =
_mesa_lookup_texture(rmesa->radeon.glCtx, texname);
radeonTexObjPtr t = radeon_tex_obj(tObj);
uint32_t pitch_val;
if (!tObj)
return;
t->image_override = GL_TRUE;
if (!offset)
return;
t->bo = NULL;
t->override_offset = offset;
t->pp_txpitch &= (1 << 13) -1;
pitch_val = pitch;
switch (depth) {
case 32:
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
t->pp_txfilter |= tx_table[2].filter;
pitch_val /= 4;
break;
case 24:
default:
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
t->pp_txfilter |= tx_table[4].filter;
pitch_val /= 4;
break;
case 16:
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
t->pp_txfilter |= tx_table[5].filter;
pitch_val /= 2;
break;
}
pitch_val--;
t->pp_txpitch |= pitch_val;
}
void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv)
{
struct gl_texture_unit *texUnit;
struct gl_texture_object *texObj;
struct gl_texture_image *texImage;
struct radeon_renderbuffer *rb;
radeon_texture_image *rImage;
radeonContextPtr radeon;
r300ContextPtr rmesa;
struct radeon_framebuffer *rfb;
radeonTexObjPtr t;
uint32_t pitch_val;
uint32_t internalFormat, type, format;
type = GL_BGRA;
format = GL_UNSIGNED_BYTE;
internalFormat = (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT ? 3 : 4);
radeon = pDRICtx->driverPrivate;
rmesa = pDRICtx->driverPrivate;
rfb = dPriv->driverPrivate;
texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
rImage = get_radeon_texture_image(texImage);
t = radeon_tex_obj(texObj);
if (t == NULL) {
return;
}
radeon_update_renderbuffers(pDRICtx, dPriv);
/* back & depth buffer are useless free them right away */
rb = (void*)rfb->base.Attachment[BUFFER_DEPTH].Renderbuffer;
if (rb && rb->bo) {
radeon_bo_unref(rb->bo);
rb->bo = NULL;
}
rb = (void*)rfb->base.Attachment[BUFFER_BACK_LEFT].Renderbuffer;
if (rb && rb->bo) {
radeon_bo_unref(rb->bo);
rb->bo = NULL;
}
rb = rfb->color_rb[0];
if (rb->bo == NULL) {
/* Failed to BO for the buffer */
return;
}
_mesa_lock_texture(radeon->glCtx, texObj);
if (t->bo) {
radeon_bo_unref(t->bo);
t->bo = NULL;
}
if (rImage->bo) {
radeon_bo_unref(rImage->bo);
rImage->bo = NULL;
}
if (t->mt) {
radeon_miptree_unreference(t->mt);
t->mt = NULL;
}
if (rImage->mt) {
radeon_miptree_unreference(rImage->mt);
rImage->mt = NULL;
}
fprintf(stderr,"settexbuf %dx%d@%d %d targ %x format %x\n", rb->width, rb->height, rb->cpp, rb->pitch, target, format);
_mesa_init_teximage_fields(radeon->glCtx, target, texImage,
rb->width, rb->height, 1, 0, rb->cpp);
texImage->RowStride = rb->pitch / rb->cpp;
texImage->TexFormat = radeonChooseTextureFormat(radeon->glCtx,
internalFormat,
type, format, 0);
rImage->bo = rb->bo;
radeon_bo_ref(rImage->bo);
t->bo = rb->bo;
radeon_bo_ref(t->bo);
t->tile_bits = 0;
t->image_override = GL_TRUE;
t->override_offset = 0;
t->pp_txpitch &= (1 << 13) -1;
pitch_val = rb->pitch;
switch (rb->cpp) {
case 4:
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
t->pp_txfilter |= tx_table[2].filter;
pitch_val /= 4;
break;
case 3:
default:
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
t->pp_txfilter |= tx_table[4].filter;
pitch_val /= 4;
break;
case 2:
t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
t->pp_txfilter |= tx_table[5].filter;
pitch_val /= 2;
break;
}
pitch_val--;
t->pp_txsize = ((rb->width - 1) << R300_TX_WIDTHMASK_SHIFT) |
((rb->height - 1) << R300_TX_HEIGHTMASK_SHIFT);
t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
t->pp_txpitch |= pitch_val;
if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
if (rb->width > 2048)
t->pp_txpitch |= R500_TXWIDTH_BIT11;
if (rb->height > 2048)
t->pp_txpitch |= R500_TXHEIGHT_BIT11;
}
t->validated = GL_TRUE;
_mesa_unlock_texture(radeon->glCtx, texObj);
return;
}
void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
{
r300SetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,35 @@
#ifndef __R600_VERTPROG_H_
#define __R600_VERTPROG_H_
#include "r600_reg.h"
#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \
(((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \
| ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \
| ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \
| ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \
| ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \
| ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \
(((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \
| ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \
| ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \
| ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \
| ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \
| ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \
| ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
#if 1
#define VSF_FLAG_X 1
#define VSF_FLAG_Y 2
#define VSF_FLAG_Z 4
#define VSF_FLAG_W 8
#define VSF_FLAG_XYZ (VSF_FLAG_X | VSF_FLAG_Y | VSF_FLAG_Z)
#define VSF_FLAG_ALL 0xf
#define VSF_FLAG_NONE 0
#endif
#endif

View File

@ -0,0 +1,719 @@
/*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "r700_fragprog.h"
#include "radeon_nqssadce.h"
#include "radeon_program_alu.h"
static void reset_srcreg(struct prog_src_register* reg)
{
_mesa_bzero(reg, sizeof(*reg));
reg->Swizzle = SWIZZLE_NOOP;
}
static struct prog_src_register shadow_ambient(struct gl_program *program, int tmu)
{
gl_state_index fail_value_tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0
};
struct prog_src_register reg = { 0, };
fail_value_tokens[2] = tmu;
reg.File = PROGRAM_STATE_VAR;
reg.Index = _mesa_add_state_reference(program->Parameters, fail_value_tokens);
reg.Swizzle = SWIZZLE_WWWW;
return reg;
}
/**
* Transform TEX, TXP, TXB, and KIL instructions in the following way:
* - premultiply texture coordinates for RECT
* - extract operand swizzles
* - introduce a temporary register when write masks are needed
*
*/
static GLboolean transform_TEX(
struct radeon_transform_context *t,
struct prog_instruction* orig_inst, void* data)
{
struct r500_fragment_program_compiler *compiler =
(struct r500_fragment_program_compiler*)data;
struct prog_instruction inst = *orig_inst;
struct prog_instruction* tgt;
GLboolean destredirect = GL_FALSE;
if (inst.Opcode != OPCODE_TEX &&
inst.Opcode != OPCODE_TXB &&
inst.Opcode != OPCODE_TXP &&
inst.Opcode != OPCODE_KIL)
return GL_FALSE;
/* ARB_shadow & EXT_shadow_funcs */
if (inst.Opcode != OPCODE_KIL &&
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
if (comparefunc == GL_NEVER || comparefunc == GL_ALWAYS) {
tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg = inst.DstReg;
if (comparefunc == GL_ALWAYS) {
tgt->SrcReg[0].File = PROGRAM_BUILTIN;
tgt->SrcReg[0].Swizzle = SWIZZLE_1111;
} else {
tgt->SrcReg[0] = shadow_ambient(t->Program, inst.TexSrcUnit);
}
return GL_TRUE;
}
inst.DstReg.File = PROGRAM_TEMPORARY;
inst.DstReg.Index = radeonFindFreeTemporary(t);
inst.DstReg.WriteMask = WRITEMASK_XYZW;
} else if (inst.Opcode != OPCODE_KIL && inst.DstReg.File != PROGRAM_TEMPORARY) {
int tempreg = radeonFindFreeTemporary(t);
inst.DstReg.File = PROGRAM_TEMPORARY;
inst.DstReg.Index = tempreg;
inst.DstReg.WriteMask = WRITEMASK_XYZW;
destredirect = GL_TRUE;
}
if (inst.SrcReg[0].File != PROGRAM_TEMPORARY && inst.SrcReg[0].File != PROGRAM_INPUT) {
int tmpreg = radeonFindFreeTemporary(t);
tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg.File = PROGRAM_TEMPORARY;
tgt->DstReg.Index = tmpreg;
tgt->SrcReg[0] = inst.SrcReg[0];
reset_srcreg(&inst.SrcReg[0]);
inst.SrcReg[0].File = PROGRAM_TEMPORARY;
inst.SrcReg[0].Index = tmpreg;
}
tgt = radeonAppendInstructions(t->Program, 1);
_mesa_copy_instructions(tgt, &inst, 1);
if (inst.Opcode != OPCODE_KIL &&
t->Program->ShadowSamplers & (1 << inst.TexSrcUnit)) {
GLuint comparefunc = GL_NEVER + compiler->fp->state.unit[inst.TexSrcUnit].texture_compare_func;
GLuint depthmode = compiler->fp->state.unit[inst.TexSrcUnit].depth_texture_mode;
int rcptemp = radeonFindFreeTemporary(t);
int pass, fail;
tgt = radeonAppendInstructions(t->Program, 3);
tgt[0].Opcode = OPCODE_RCP;
tgt[0].DstReg.File = PROGRAM_TEMPORARY;
tgt[0].DstReg.Index = rcptemp;
tgt[0].DstReg.WriteMask = WRITEMASK_W;
tgt[0].SrcReg[0] = inst.SrcReg[0];
tgt[0].SrcReg[0].Swizzle = SWIZZLE_WWWW;
tgt[1].Opcode = OPCODE_MAD;
tgt[1].DstReg = inst.DstReg;
tgt[1].DstReg.WriteMask = orig_inst->DstReg.WriteMask;
tgt[1].SrcReg[0] = inst.SrcReg[0];
tgt[1].SrcReg[0].Swizzle = SWIZZLE_ZZZZ;
tgt[1].SrcReg[1].File = PROGRAM_TEMPORARY;
tgt[1].SrcReg[1].Index = rcptemp;
tgt[1].SrcReg[1].Swizzle = SWIZZLE_WWWW;
tgt[1].SrcReg[2].File = PROGRAM_TEMPORARY;
tgt[1].SrcReg[2].Index = inst.DstReg.Index;
if (depthmode == 0) /* GL_LUMINANCE */
tgt[1].SrcReg[2].Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z);
else if (depthmode == 2) /* GL_ALPHA */
tgt[1].SrcReg[2].Swizzle = SWIZZLE_WWWW;
/* Recall that SrcReg[0] is tex, SrcReg[2] is r and:
* r < tex <=> -tex+r < 0
* r >= tex <=> not (-tex+r < 0 */
if (comparefunc == GL_LESS || comparefunc == GL_GEQUAL)
tgt[1].SrcReg[2].NegateBase = tgt[0].SrcReg[2].NegateBase ^ NEGATE_XYZW;
else
tgt[1].SrcReg[0].NegateBase = tgt[0].SrcReg[0].NegateBase ^ NEGATE_XYZW;
tgt[2].Opcode = OPCODE_CMP;
tgt[2].DstReg = orig_inst->DstReg;
tgt[2].SrcReg[0].File = PROGRAM_TEMPORARY;
tgt[2].SrcReg[0].Index = tgt[1].DstReg.Index;
if (comparefunc == GL_LESS || comparefunc == GL_GREATER) {
pass = 1;
fail = 2;
} else {
pass = 2;
fail = 1;
}
tgt[2].SrcReg[pass].File = PROGRAM_BUILTIN;
tgt[2].SrcReg[pass].Swizzle = SWIZZLE_1111;
tgt[2].SrcReg[fail] = shadow_ambient(t->Program, inst.TexSrcUnit);
} else if (destredirect) {
tgt = radeonAppendInstructions(t->Program, 1);
tgt->Opcode = OPCODE_MOV;
tgt->DstReg = orig_inst->DstReg;
tgt->SrcReg[0].File = PROGRAM_TEMPORARY;
tgt->SrcReg[0].Index = inst.DstReg.Index;
}
return GL_TRUE;
}
static void update_params(r300ContextPtr r300, struct r500_fragment_program *fp)
{
struct gl_fragment_program *mp = &fp->mesa_program;
/* Ask Mesa nicely to fill in ParameterValues for us */
if (mp->Base.Parameters)
_mesa_load_state_parameters(r300->radeon.glCtx, mp->Base.Parameters);
}
/**
* Transform the program to support fragment.position.
*
* Introduce a small fragment at the start of the program that will be
* the only code that directly reads the FRAG_ATTRIB_WPOS input.
* All other code pieces that reference that input will be rewritten
* to read from a newly allocated temporary.
*
* \todo if/when r5xx supports the radeon_program architecture, this is a
* likely candidate for code sharing.
*/
static void insert_WPOS_trailer(struct r500_fragment_program_compiler *compiler)
{
GLuint InputsRead = compiler->fp->mesa_program.Base.InputsRead;
if (!(InputsRead & FRAG_BIT_WPOS))
return;
static gl_state_index tokens[STATE_LENGTH] = {
STATE_INTERNAL, STATE_R300_WINDOW_DIMENSION, 0, 0, 0
};
struct prog_instruction *fpi;
GLuint window_index;
int i = 0;
GLuint tempregi = _mesa_find_free_register(compiler->program, PROGRAM_TEMPORARY);
_mesa_insert_instructions(compiler->program, 0, 3);
fpi = compiler->program->Instructions;
/* perspective divide */
fpi[i].Opcode = OPCODE_RCP;
fpi[i].DstReg.File = PROGRAM_TEMPORARY;
fpi[i].DstReg.Index = tempregi;
fpi[i].DstReg.WriteMask = WRITEMASK_W;
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_INPUT;
fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
fpi[i].SrcReg[0].Swizzle = SWIZZLE_WWWW;
i++;
fpi[i].Opcode = OPCODE_MUL;
fpi[i].DstReg.File = PROGRAM_TEMPORARY;
fpi[i].DstReg.Index = tempregi;
fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_INPUT;
fpi[i].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
fpi[i].SrcReg[0].Swizzle = SWIZZLE_XYZW;
fpi[i].SrcReg[1].File = PROGRAM_TEMPORARY;
fpi[i].SrcReg[1].Index = tempregi;
fpi[i].SrcReg[1].Swizzle = SWIZZLE_WWWW;
i++;
/* viewport transformation */
window_index = _mesa_add_state_reference(compiler->program->Parameters, tokens);
fpi[i].Opcode = OPCODE_MAD;
fpi[i].DstReg.File = PROGRAM_TEMPORARY;
fpi[i].DstReg.Index = tempregi;
fpi[i].DstReg.WriteMask = WRITEMASK_XYZ;
fpi[i].DstReg.CondMask = COND_TR;
fpi[i].SrcReg[0].File = PROGRAM_TEMPORARY;
fpi[i].SrcReg[0].Index = tempregi;
fpi[i].SrcReg[0].Swizzle =
MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
fpi[i].SrcReg[1].File = PROGRAM_STATE_VAR;
fpi[i].SrcReg[1].Index = window_index;
fpi[i].SrcReg[1].Swizzle =
MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
fpi[i].SrcReg[2].File = PROGRAM_STATE_VAR;
fpi[i].SrcReg[2].Index = window_index;
fpi[i].SrcReg[2].Swizzle =
MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ZERO);
i++;
for (; i < compiler->program->NumInstructions; ++i) {
int reg;
for (reg = 0; reg < 3; reg++) {
if (fpi[i].SrcReg[reg].File == PROGRAM_INPUT &&
fpi[i].SrcReg[reg].Index == FRAG_ATTRIB_WPOS) {
fpi[i].SrcReg[reg].File = PROGRAM_TEMPORARY;
fpi[i].SrcReg[reg].Index = tempregi;
}
}
}
}
static void nqssadce_init(struct nqssadce_state* s)
{
s->Outputs[FRAG_RESULT_COLOR].Sourced = WRITEMASK_XYZW;
s->Outputs[FRAG_RESULT_DEPTH].Sourced = WRITEMASK_W;
}
static GLboolean is_native_swizzle(GLuint opcode, struct prog_src_register reg)
{
GLuint relevant;
int i;
if (opcode == OPCODE_TEX ||
opcode == OPCODE_TXB ||
opcode == OPCODE_TXP ||
opcode == OPCODE_KIL) {
if (reg.Abs)
return GL_FALSE;
if (reg.NegateAbs)
reg.NegateBase ^= 15;
if (opcode == OPCODE_KIL) {
if (reg.Swizzle != SWIZZLE_NOOP)
return GL_FALSE;
} else {
for(i = 0; i < 4; ++i) {
GLuint swz = GET_SWZ(reg.Swizzle, i);
if (swz == SWIZZLE_NIL) {
reg.NegateBase &= ~(1 << i);
continue;
}
if (swz >= 4)
return GL_FALSE;
}
}
if (reg.NegateBase)
return GL_FALSE;
return GL_TRUE;
} else if (opcode == OPCODE_DDX || opcode == OPCODE_DDY) {
/* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
* if it doesn't fit perfectly into a .xyzw case... */
if (reg.Swizzle == SWIZZLE_NOOP && !reg.Abs
&& !reg.NegateBase && !reg.NegateAbs)
return GL_TRUE;
return GL_FALSE;
} else {
/* ALU instructions support almost everything */
if (reg.Abs)
return GL_TRUE;
relevant = 0;
for(i = 0; i < 3; ++i) {
GLuint swz = GET_SWZ(reg.Swizzle, i);
if (swz != SWIZZLE_NIL && swz != SWIZZLE_ZERO)
relevant |= 1 << i;
}
if ((reg.NegateBase & relevant) && ((reg.NegateBase & relevant) != relevant))
return GL_FALSE;
return GL_TRUE;
}
}
/**
* Implement a MOV with a potentially non-native swizzle.
*
* The only thing we *cannot* do in an ALU instruction is per-component
* negation. Therefore, we split the MOV into two instructions when necessary.
*/
static void nqssadce_build_swizzle(struct nqssadce_state *s,
struct prog_dst_register dst, struct prog_src_register src)
{
struct prog_instruction *inst;
GLuint negatebase[2] = { 0, 0 };
int i;
for(i = 0; i < 4; ++i) {
GLuint swz = GET_SWZ(src.Swizzle, i);
if (swz == SWIZZLE_NIL)
continue;
negatebase[GET_BIT(src.NegateBase, i)] |= 1 << i;
}
_mesa_insert_instructions(s->Program, s->IP, (negatebase[0] ? 1 : 0) + (negatebase[1] ? 1 : 0));
inst = s->Program->Instructions + s->IP;
for(i = 0; i <= 1; ++i) {
if (!negatebase[i])
continue;
inst->Opcode = OPCODE_MOV;
inst->DstReg = dst;
inst->DstReg.WriteMask = negatebase[i];
inst->SrcReg[0] = src;
inst++;
s->IP++;
}
}
static GLuint build_dtm(GLuint depthmode)
{
switch(depthmode) {
default:
case GL_LUMINANCE: return 0;
case GL_INTENSITY: return 1;
case GL_ALPHA: return 2;
}
}
static GLuint build_func(GLuint comparefunc)
{
return comparefunc - GL_NEVER;
}
/**
* Collect all external state that is relevant for compiling the given
* fragment program.
*/
static void build_state(
r300ContextPtr r300,
struct r500_fragment_program *fp,
struct r500_fragment_program_external_state *state)
{
int unit;
_mesa_bzero(state, sizeof(*state));
for(unit = 0; unit < 16; ++unit) {
if (fp->mesa_program.Base.ShadowSamplers & (1 << unit)) {
struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
state->unit[unit].depth_texture_mode = build_dtm(tex->DepthMode);
state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
}
}
}
static void dump_program(struct r500_fragment_program_code *code);
void r500TranslateFragmentShader(r300ContextPtr r300,
struct r500_fragment_program *fp)
{
struct r500_fragment_program_external_state state;
build_state(r300, fp, &state);
if (_mesa_memcmp(&fp->state, &state, sizeof(state))) {
/* TODO: cache compiled programs */
fp->translated = GL_FALSE;
_mesa_memcpy(&fp->state, &state, sizeof(state));
}
if (!fp->translated) {
struct r500_fragment_program_compiler compiler;
compiler.r300 = r300;
compiler.fp = fp;
compiler.code = &fp->code;
compiler.program = _mesa_clone_program(r300->radeon.glCtx, &fp->mesa_program.Base);
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Compiler: Initial program:\n");
_mesa_print_program(compiler.program);
}
insert_WPOS_trailer(&compiler);
struct radeon_program_transformation transformations[] = {
{ &transform_TEX, &compiler },
{ &radeonTransformALU, 0 },
{ &radeonTransformDeriv, 0 },
{ &radeonTransformTrigScale, 0 }
};
radeonLocalTransform(r300->radeon.glCtx, compiler.program,
4, transformations);
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Compiler: after native rewrite:\n");
_mesa_print_program(compiler.program);
}
struct radeon_nqssadce_descr nqssadce = {
.Init = &nqssadce_init,
.IsNativeSwizzle = &is_native_swizzle,
.BuildSwizzle = &nqssadce_build_swizzle,
.RewriteDepthOut = GL_TRUE
};
radeonNqssaDce(r300->radeon.glCtx, compiler.program, &nqssadce);
if (RADEON_DEBUG & DEBUG_PIXEL) {
_mesa_printf("Compiler: after NqSSA-DCE:\n");
_mesa_print_program(compiler.program);
}
fp->translated = r500FragmentProgramEmit(&compiler);
/* Subtle: Rescue any parameters that have been added during transformations */
_mesa_free_parameter_list(fp->mesa_program.Base.Parameters);
fp->mesa_program.Base.Parameters = compiler.program->Parameters;
compiler.program->Parameters = 0;
_mesa_reference_program(r300->radeon.glCtx, &compiler.program, 0);
r300UpdateStateParameters(r300->radeon.glCtx, _NEW_PROGRAM);
if (RADEON_DEBUG & DEBUG_PIXEL) {
if (fp->translated) {
_mesa_printf("Machine-readable code:\n");
dump_program(&fp->code);
}
}
}
update_params(r300, fp);
}
static char *toswiz(int swiz_val) {
switch(swiz_val) {
case 0: return "R";
case 1: return "G";
case 2: return "B";
case 3: return "A";
case 4: return "0";
case 5: return "1/2";
case 6: return "1";
case 7: return "U";
}
return NULL;
}
static char *toop(int op_val)
{
char *str = NULL;
switch (op_val) {
case 0: str = "MAD"; break;
case 1: str = "DP3"; break;
case 2: str = "DP4"; break;
case 3: str = "D2A"; break;
case 4: str = "MIN"; break;
case 5: str = "MAX"; break;
case 6: str = "Reserved"; break;
case 7: str = "CND"; break;
case 8: str = "CMP"; break;
case 9: str = "FRC"; break;
case 10: str = "SOP"; break;
case 11: str = "MDH"; break;
case 12: str = "MDV"; break;
}
return str;
}
static char *to_alpha_op(int op_val)
{
char *str = NULL;
switch (op_val) {
case 0: str = "MAD"; break;
case 1: str = "DP"; break;
case 2: str = "MIN"; break;
case 3: str = "MAX"; break;
case 4: str = "Reserved"; break;
case 5: str = "CND"; break;
case 6: str = "CMP"; break;
case 7: str = "FRC"; break;
case 8: str = "EX2"; break;
case 9: str = "LN2"; break;
case 10: str = "RCP"; break;
case 11: str = "RSQ"; break;
case 12: str = "SIN"; break;
case 13: str = "COS"; break;
case 14: str = "MDH"; break;
case 15: str = "MDV"; break;
}
return str;
}
static char *to_mask(int val)
{
char *str = NULL;
switch(val) {
case 0: str = "NONE"; break;
case 1: str = "R"; break;
case 2: str = "G"; break;
case 3: str = "RG"; break;
case 4: str = "B"; break;
case 5: str = "RB"; break;
case 6: str = "GB"; break;
case 7: str = "RGB"; break;
case 8: str = "A"; break;
case 9: str = "AR"; break;
case 10: str = "AG"; break;
case 11: str = "ARG"; break;
case 12: str = "AB"; break;
case 13: str = "ARB"; break;
case 14: str = "AGB"; break;
case 15: str = "ARGB"; break;
}
return str;
}
static char *to_texop(int val)
{
switch(val) {
case 0: return "NOP";
case 1: return "LD";
case 2: return "TEXKILL";
case 3: return "PROJ";
case 4: return "LODBIAS";
case 5: return "LOD";
case 6: return "DXDY";
}
return NULL;
}
static void dump_program(struct r500_fragment_program_code *code)
{
fprintf(stderr, "R500 Fragment Program:\n--------\n");
int n;
uint32_t inst;
uint32_t inst0;
char *str = NULL;
if (code->const_nr) {
fprintf(stderr, "--------\nConstants:\n");
for (n = 0; n < code->const_nr; n++) {
fprintf(stderr, "Constant %d: %i[%i]\n", n,
code->constant[n].File, code->constant[n].Index);
}
fprintf(stderr, "--------\n");
}
for (n = 0; n < code->inst_end+1; n++) {
inst0 = inst = code->inst[n].inst0;
fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
switch(inst & 0x3) {
case R500_INST_TYPE_ALU: str = "ALU"; break;
case R500_INST_TYPE_OUT: str = "OUT"; break;
case R500_INST_TYPE_FC: str = "FC"; break;
case R500_INST_TYPE_TEX: str = "TEX"; break;
};
fprintf(stderr,"%s %s %s %s %s ", str,
inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
inst & R500_INST_LAST ? "LAST" : "",
inst & R500_INST_NOP ? "NOP" : "",
inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
to_mask((inst >> 15) & 0xf));
switch(inst0 & 0x3) {
case 0:
case 1:
fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
inst = code->inst[n].inst1;
fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
(inst >> 30));
fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
inst = code->inst[n].inst2;
fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
(inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
(inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
(inst >> 30));
fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
inst = code->inst[n].inst3;
fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d\n",
(inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
(inst >> 11) & 0x3,
(inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
(inst >> 24) & 0x3);
fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
inst = code->inst[n].inst4;
fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d w:%d\n", to_alpha_op(inst & 0xf),
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
(inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
(inst >> 31) & 0x1);
fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
inst = code->inst[n].inst5;
fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
(inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
(inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
(inst >> 23) & 0x3,
(inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
break;
case 2:
break;
case 3:
inst = code->inst[n].inst1;
fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
(inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
inst = code->inst[n].inst2;
fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
inst & 127, inst & (1<<7) ? "(rel)" : "",
toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
(inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
break;
}
fprintf(stderr,"\n");
}
}

View File

@ -0,0 +1,62 @@
/*
* Copyright (C) 2005 Ben Skeggs.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/*
* Authors:
* Ben Skeggs <darktama@iinet.net.au>
* Jerome Glisse <j.glisse@gmail.com>
*/
#ifndef __R700_FRAGPROG_H_
#define __R700_FRAGPROG_H_
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "shader/prog_parameter.h"
#include "shader/prog_print.h"
#include "shader/program.h"
#include "shader/prog_instruction.h"
#include "r600_context.h"
#include "r600_state.h"
#include "radeon_program.h"
struct r500_fragment_program;
extern void r500TranslateFragmentShader(r300ContextPtr r300,
struct r500_fragment_program *fp);
struct r500_fragment_program_compiler {
r300ContextPtr r300;
struct r500_fragment_program *fp;
struct r500_fragment_program_code *code;
struct gl_program *program;
};
extern GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler);
#endif

View File

@ -0,0 +1,327 @@
/*
* Copyright (C) 2005 Ben Skeggs.
*
* Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
* Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* \file
*
* \author Ben Skeggs <darktama@iinet.net.au>
*
* \author Jerome Glisse <j.glisse@gmail.com>
*
* \author Corbin Simpson <MostAwesomeDude@gmail.com>
*
* \todo Depth write, WPOS/FOGC inputs
*
* \todo FogOption
*
*/
#include "r700_fragprog.h"
#include "radeon_program_pair.h"
#define PROG_CODE \
struct r500_fragment_program_compiler *c = (struct r500_fragment_program_compiler*)data; \
struct r500_fragment_program_code *code = c->code
#define error(fmt, args...) do { \
fprintf(stderr, "%s::%s(): " fmt "\n", \
__FILE__, __FUNCTION__, ##args); \
} while(0)
/**
* Callback to register hardware constants.
*/
static GLboolean emit_const(void *data, GLuint file, GLuint idx, GLuint *hwindex)
{
PROG_CODE;
for (*hwindex = 0; *hwindex < code->const_nr; ++*hwindex) {
if (code->constant[*hwindex].File == file &&
code->constant[*hwindex].Index == idx)
break;
}
if (*hwindex >= code->const_nr) {
if (*hwindex >= PFS_NUM_CONST_REGS) {
error("Out of hw constants!\n");
return GL_FALSE;
}
code->const_nr++;
code->constant[*hwindex].File = file;
code->constant[*hwindex].Index = idx;
}
return GL_TRUE;
}
static GLuint translate_rgb_op(GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
case OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
case OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
case OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
case OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
case OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
default:
error("translate_rgb_op(%d): unknown opcode\n", opcode);
/* fall through */
case OPCODE_NOP:
/* fall through */
case OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
case OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
case OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
case OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
}
}
static GLuint translate_alpha_op(GLuint opcode)
{
switch(opcode) {
case OPCODE_CMP: return R500_ALPHA_OP_CMP;
case OPCODE_COS: return R500_ALPHA_OP_COS;
case OPCODE_DDX: return R500_ALPHA_OP_MDH;
case OPCODE_DDY: return R500_ALPHA_OP_MDV;
case OPCODE_DP3: return R500_ALPHA_OP_DP;
case OPCODE_DP4: return R500_ALPHA_OP_DP;
case OPCODE_EX2: return R500_ALPHA_OP_EX2;
case OPCODE_FRC: return R500_ALPHA_OP_FRC;
case OPCODE_LG2: return R500_ALPHA_OP_LN2;
default:
error("translate_alpha_op(%d): unknown opcode\n", opcode);
/* fall through */
case OPCODE_NOP:
/* fall through */
case OPCODE_MAD: return R500_ALPHA_OP_MAD;
case OPCODE_MAX: return R500_ALPHA_OP_MAX;
case OPCODE_MIN: return R500_ALPHA_OP_MIN;
case OPCODE_RCP: return R500_ALPHA_OP_RCP;
case OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
case OPCODE_SIN: return R500_ALPHA_OP_SIN;
}
}
static GLuint fix_hw_swizzle(GLuint swz)
{
if (swz == 5) swz = 6;
if (swz == SWIZZLE_NIL) swz = 4;
return swz;
}
static GLuint translate_arg_rgb(struct radeon_pair_instruction *inst, int arg)
{
GLuint t = inst->RGB.Arg[arg].Source;
int comp;
t |= inst->RGB.Arg[arg].Negate << 11;
t |= inst->RGB.Arg[arg].Abs << 12;
for(comp = 0; comp < 3; ++comp)
t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
return t;
}
static GLuint translate_arg_alpha(struct radeon_pair_instruction *inst, int i)
{
GLuint t = inst->Alpha.Arg[i].Source;
t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
t |= inst->Alpha.Arg[i].Negate << 5;
t |= inst->Alpha.Arg[i].Abs << 6;
return t;
}
static void use_temporary(struct r500_fragment_program_code* code, GLuint index)
{
if (index > code->max_temp_idx)
code->max_temp_idx = index;
}
static GLuint use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
{
if (!src.Constant)
use_temporary(code, src.Index);
return src.Index | src.Constant << 8;
}
/**
* Emit a paired ALU instruction.
*/
static GLboolean emit_paired(void *data, struct radeon_pair_instruction *inst)
{
PROG_CODE;
if (code->inst_end >= 511) {
error("emit_alu: Too many instructions");
return GL_FALSE;
}
int ip = ++code->inst_end;
code->inst[ip].inst5 = translate_rgb_op(inst->RGB.Opcode);
code->inst[ip].inst4 = translate_alpha_op(inst->Alpha.Opcode);
if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask)
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
else
code->inst[ip].inst0 = R500_INST_TYPE_ALU;
code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
if (inst->Alpha.DepthWriteMask) {
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
c->fp->writes_depth = GL_TRUE;
}
code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
use_temporary(code, inst->Alpha.DestIndex);
use_temporary(code, inst->RGB.DestIndex);
if (inst->RGB.Saturate)
code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
if (inst->Alpha.Saturate)
code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
return GL_TRUE;
}
static GLuint translate_strq_swizzle(struct prog_src_register src)
{
GLuint swiz = 0;
int i;
for (i = 0; i < 4; i++)
swiz |= (GET_SWZ(src.Swizzle, i) & 0x3) << i*2;
return swiz;
}
/**
* Emit a single TEX instruction
*/
static GLboolean emit_tex(void *data, struct prog_instruction *inst)
{
PROG_CODE;
if (code->inst_end >= 511) {
error("emit_tex: Too many instructions");
return GL_FALSE;
}
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_TEX
| (inst->DstReg.WriteMask << 11)
| R500_INST_TEX_SEM_WAIT;
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
| R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
if (inst->TexSrcTarget == TEXTURE_RECT_INDEX)
code->inst[ip].inst1 |= R500_TEX_UNSCALED;
switch (inst->Opcode) {
case OPCODE_KIL:
code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
break;
case OPCODE_TEX:
code->inst[ip].inst1 |= R500_TEX_INST_LD;
break;
case OPCODE_TXB:
code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
break;
case OPCODE_TXP:
code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
break;
default:
error("emit_tex can't handle opcode %x\n", inst->Opcode);
}
code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
| (translate_strq_swizzle(inst->SrcReg[0]) << 8)
| R500_TEX_DST_ADDR(inst->DstReg.Index)
| R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
| R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
return GL_TRUE;
}
static const struct radeon_pair_handler pair_handler = {
.EmitConst = emit_const,
.EmitPaired = emit_paired,
.EmitTex = emit_tex,
.MaxHwTemps = 128
};
GLboolean r500FragmentProgramEmit(struct r500_fragment_program_compiler *compiler)
{
struct r500_fragment_program_code *code = compiler->code;
_mesa_bzero(code, sizeof(*code));
code->max_temp_idx = 1;
code->inst_offset = 0;
code->inst_end = -1;
if (!radeonPairProgram(compiler->r300->radeon.glCtx, compiler->program, &pair_handler, compiler))
return GL_FALSE;
if ((code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
/* This may happen when dead-code elimination is disabled or
* when most of the fragment program logic is leading to a KIL */
if (code->inst_end >= 511) {
error("Introducing fake OUT: Too many instructions");
return GL_FALSE;
}
int ip = ++code->inst_end;
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
return GL_TRUE;
}

View File

@ -0,0 +1,76 @@
/**************************************************************************
Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
VA Linux Systems Inc., Fremont, California.
Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
The Weather Channel (TM) funded Tungsten Graphics to develop the
initial release of the Radeon 8500 driver under the XFree86 license.
This notice must be preserved.
All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice (including the
next paragraph) shall be included in all copies or substantial
portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
**************************************************************************/
/*
* Authors:
* Gareth Hughes <gareth@valinux.com>
* Keith Whitwell <keith@tungstengraphics.com>
* Kevin E. Martin <martin@valinux.com>
* Nicolai Haehnle <prefect_@gmx.net>
*/
#ifndef __RADEON_CONTEXT_H__
#define __RADEON_CONTEXT_H__
#include "main/mtypes.h"
#include "main/colormac.h"
#include "radeon_screen.h"
#include "drm.h"
#include "dri_util.h"
#include "radeon_screen.h"
#if R200_MERGED
extern void radeonFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
#define FALLBACK( radeon, bit, mode ) do { \
if ( 0 ) fprintf( stderr, "FALLBACK in %s: #%d=%d\n", \
__FUNCTION__, bit, mode ); \
radeonFallback( (radeon)->glCtx, bit, mode ); \
} while (0)
#else
#define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__);
#endif
/* TCL fallbacks */
extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
#if R200_MERGED
#define TCL_FALLBACK( ctx, bit, mode ) radeonTclFallback( ctx, bit, mode )
#else
#define TCL_FALLBACK( ctx, bit, mode ) ;
#endif
#endif /* __RADEON_CONTEXT_H__ */

View File

@ -0,0 +1,284 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* @file
*
* "Not-quite SSA" and Dead-Code Elimination.
*
* @note This code uses SWIZZLE_NIL in a source register to indicate that
* the corresponding component is ignored by the corresponding instruction.
*/
#include "radeon_nqssadce.h"
/**
* Return the @ref register_state for the given register (or 0 for untracked
* registers, i.e. constants).
*/
static struct register_state *get_reg_state(struct nqssadce_state* s, GLuint file, GLuint index)
{
switch(file) {
case PROGRAM_TEMPORARY: return &s->Temps[index];
case PROGRAM_OUTPUT: return &s->Outputs[index];
default: return 0;
}
}
/**
* Left multiplication of a register with a swizzle
*
* @note Works correctly only for X, Y, Z, W swizzles, not for constant swizzles.
*/
static struct prog_src_register lmul_swizzle(GLuint swizzle, struct prog_src_register srcreg)
{
struct prog_src_register tmp = srcreg;
int i;
tmp.Swizzle = 0;
tmp.NegateBase = 0;
for(i = 0; i < 4; ++i) {
GLuint swz = GET_SWZ(swizzle, i);
if (swz < 4) {
tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
tmp.NegateBase |= GET_BIT(srcreg.NegateBase, swz) << i;
} else {
tmp.Swizzle |= swz << (i*3);
}
}
return tmp;
}
static struct prog_instruction* track_used_srcreg(struct nqssadce_state* s,
struct prog_instruction *inst, GLint src, GLuint sourced)
{
int i;
GLuint deswz_source = 0;
for(i = 0; i < 4; ++i) {
if (GET_BIT(sourced, i)) {
GLuint swz = GET_SWZ(inst->SrcReg[src].Swizzle, i);
deswz_source |= 1 << swz;
} else {
inst->SrcReg[src].Swizzle &= ~(7 << (3*i));
inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
}
}
if (!s->Descr->IsNativeSwizzle(inst->Opcode, inst->SrcReg[src])) {
struct prog_dst_register dstreg = inst->DstReg;
dstreg.File = PROGRAM_TEMPORARY;
dstreg.Index = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
dstreg.WriteMask = sourced;
s->Descr->BuildSwizzle(s, dstreg, inst->SrcReg[src]);
inst = s->Program->Instructions + s->IP;
inst->SrcReg[src].File = PROGRAM_TEMPORARY;
inst->SrcReg[src].Index = dstreg.Index;
inst->SrcReg[src].Swizzle = 0;
inst->SrcReg[src].NegateBase = 0;
inst->SrcReg[src].Abs = 0;
inst->SrcReg[src].NegateAbs = 0;
for(i = 0; i < 4; ++i) {
if (GET_BIT(sourced, i))
inst->SrcReg[src].Swizzle |= i << (3*i);
else
inst->SrcReg[src].Swizzle |= SWIZZLE_NIL << (3*i);
}
deswz_source = sourced;
}
struct register_state *regstate = get_reg_state(s, inst->SrcReg[src].File, inst->SrcReg[src].Index);
if (regstate)
regstate->Sourced |= deswz_source & 0xf;
return inst;
}
static void rewrite_depth_out(struct prog_instruction *inst)
{
if (inst->DstReg.WriteMask & WRITEMASK_Z) {
inst->DstReg.WriteMask = WRITEMASK_W;
} else {
inst->DstReg.WriteMask = 0;
return;
}
switch (inst->Opcode) {
case OPCODE_FRC:
case OPCODE_MOV:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
break;
case OPCODE_ADD:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MUL:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
break;
case OPCODE_CMP:
case OPCODE_MAD:
inst->SrcReg[0] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[0]);
inst->SrcReg[1] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[1]);
inst->SrcReg[2] = lmul_swizzle(SWIZZLE_ZZZZ, inst->SrcReg[2]);
break;
default:
// Scalar instructions needn't be reswizzled
break;
}
}
static void unalias_srcregs(struct prog_instruction *inst, GLuint oldindex, GLuint newindex)
{
int nsrc = _mesa_num_inst_src_regs(inst->Opcode);
int i;
for(i = 0; i < nsrc; ++i)
if (inst->SrcReg[i].File == PROGRAM_TEMPORARY && inst->SrcReg[i].Index == oldindex)
inst->SrcReg[i].Index = newindex;
}
static void unalias_temporary(struct nqssadce_state* s, GLuint oldindex)
{
GLuint newindex = _mesa_find_free_register(s->Program, PROGRAM_TEMPORARY);
int ip;
for(ip = 0; ip < s->IP; ++ip) {
struct prog_instruction* inst = s->Program->Instructions + ip;
if (inst->DstReg.File == PROGRAM_TEMPORARY && inst->DstReg.Index == oldindex)
inst->DstReg.Index = newindex;
unalias_srcregs(inst, oldindex, newindex);
}
unalias_srcregs(s->Program->Instructions + s->IP, oldindex, newindex);
}
/**
* Handle one instruction.
*/
static void process_instruction(struct nqssadce_state* s)
{
struct prog_instruction *inst = s->Program->Instructions + s->IP;
if (inst->Opcode == OPCODE_END)
return;
if (inst->Opcode != OPCODE_KIL) {
if (s->Descr->RewriteDepthOut) {
if (inst->DstReg.File == PROGRAM_OUTPUT && inst->DstReg.Index == FRAG_RESULT_DEPTH)
rewrite_depth_out(inst);
}
struct register_state *regstate = get_reg_state(s, inst->DstReg.File, inst->DstReg.Index);
if (!regstate) {
_mesa_problem(s->Ctx, "NqssaDce: bad destination register (%i[%i])\n",
inst->DstReg.File, inst->DstReg.Index);
return;
}
inst->DstReg.WriteMask &= regstate->Sourced;
regstate->Sourced &= ~inst->DstReg.WriteMask;
if (inst->DstReg.WriteMask == 0) {
_mesa_delete_instructions(s->Program, s->IP, 1);
return;
}
if (inst->DstReg.File == PROGRAM_TEMPORARY && !regstate->Sourced)
unalias_temporary(s, inst->DstReg.Index);
}
/* Attention: Due to swizzle emulation code, the following
* might change the instruction stream under us, so we have
* to be careful with the inst pointer. */
switch (inst->Opcode) {
case OPCODE_DDX:
case OPCODE_DDY:
case OPCODE_FRC:
case OPCODE_MOV:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
break;
case OPCODE_ADD:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MUL:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
break;
case OPCODE_CMP:
case OPCODE_MAD:
inst = track_used_srcreg(s, inst, 0, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 1, inst->DstReg.WriteMask);
inst = track_used_srcreg(s, inst, 2, inst->DstReg.WriteMask);
break;
case OPCODE_COS:
case OPCODE_EX2:
case OPCODE_LG2:
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
inst = track_used_srcreg(s, inst, 0, 0x1);
break;
case OPCODE_DP3:
inst = track_used_srcreg(s, inst, 0, 0x7);
inst = track_used_srcreg(s, inst, 1, 0x7);
break;
case OPCODE_DP4:
inst = track_used_srcreg(s, inst, 0, 0xf);
inst = track_used_srcreg(s, inst, 1, 0xf);
break;
case OPCODE_KIL:
case OPCODE_TEX:
case OPCODE_TXB:
case OPCODE_TXP:
inst = track_used_srcreg(s, inst, 0, 0xf);
break;
default:
_mesa_problem(s->Ctx, "NqssaDce: Unknown opcode %d\n", inst->Opcode);
return;
}
}
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr)
{
struct nqssadce_state s;
_mesa_bzero(&s, sizeof(s));
s.Ctx = ctx;
s.Program = p;
s.Descr = descr;
s.Descr->Init(&s);
s.IP = p->NumInstructions;
while(s.IP > 0) {
s.IP--;
process_instruction(&s);
}
}

View File

@ -0,0 +1,96 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __RADEON_PROGRAM_NQSSADCE_H_
#define __RADEON_PROGRAM_NQSSADCE_H_
#include "radeon_program.h"
struct register_state {
/**
* Bitmask indicating which components of the register are sourced
* by later instructions.
*/
GLuint Sourced : 4;
};
/**
* Maintain state such as which registers are used, which registers are
* read from, etc.
*/
struct nqssadce_state {
GLcontext *Ctx;
struct gl_program *Program;
struct radeon_nqssadce_descr *Descr;
/**
* All instructions after this instruction pointer have been dealt with.
*/
int IP;
/**
* Which registers are read by subsequent instructions?
*/
struct register_state Temps[MAX_PROGRAM_TEMPS];
struct register_state Outputs[VERT_RESULT_MAX];
};
/**
* This structure contains a description of the hardware in-so-far as
* it is required for the NqSSA-DCE pass.
*/
struct radeon_nqssadce_descr {
/**
* Fill in which outputs
*/
void (*Init)(struct nqssadce_state *);
/**
* Check whether the given swizzle, absolute and negate combination
* can be implemented natively by the hardware for this opcode.
*/
GLboolean (*IsNativeSwizzle)(GLuint opcode, struct prog_src_register reg);
/**
* Emit (at the current IP) the instruction MOV dst, src;
* The transformation will work recursively on the emitted instruction(s).
*/
void (*BuildSwizzle)(struct nqssadce_state*, struct prog_dst_register dst, struct prog_src_register src);
/**
* Rewrite instructions that write to DEPR.z to write to DEPR.w
* instead (rewriting is done *before* the WriteMask test).
*/
GLboolean RewriteDepthOut;
void *Data;
};
void radeonNqssaDce(GLcontext *ctx, struct gl_program *p, struct radeon_nqssadce_descr* descr);
#endif /* __RADEON_PROGRAM_NQSSADCE_H_ */

View File

@ -0,0 +1,128 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "radeon_program.h"
#include "shader/prog_print.h"
/**
* Transform the given clause in the following way:
* 1. Replace it with an empty clause
* 2. For every instruction in the original clause, try the given
* transformations in order.
* 3. If one of the transformations returns GL_TRUE, assume that it
* has emitted the appropriate instruction(s) into the new clause;
* otherwise, copy the instruction verbatim.
*
* \note The transformation is currently not recursive; in other words,
* instructions emitted by transformations are not transformed.
*
* \note The transform is called 'local' because it can only look at
* one instruction at a time.
*/
void radeonLocalTransform(
GLcontext *Ctx,
struct gl_program *program,
int num_transformations,
struct radeon_program_transformation* transformations)
{
struct radeon_transform_context ctx;
int ip;
ctx.Ctx = Ctx;
ctx.Program = program;
ctx.OldInstructions = program->Instructions;
ctx.OldNumInstructions = program->NumInstructions;
program->Instructions = 0;
program->NumInstructions = 0;
for(ip = 0; ip < ctx.OldNumInstructions; ++ip) {
struct prog_instruction *instr = ctx.OldInstructions + ip;
int i;
for(i = 0; i < num_transformations; ++i) {
struct radeon_program_transformation* t = transformations + i;
if (t->function(&ctx, instr, t->userData))
break;
}
if (i >= num_transformations) {
struct prog_instruction* dest = radeonAppendInstructions(program, 1);
_mesa_copy_instructions(dest, instr, 1);
}
}
_mesa_free_instructions(ctx.OldInstructions, ctx.OldNumInstructions);
}
static void scan_instructions(GLboolean* used, const struct prog_instruction* insts, GLuint count)
{
GLuint i;
for (i = 0; i < count; i++) {
const struct prog_instruction *inst = insts + i;
const GLuint n = _mesa_num_inst_src_regs(inst->Opcode);
GLuint k;
for (k = 0; k < n; k++) {
if (inst->SrcReg[k].File == PROGRAM_TEMPORARY)
used[inst->SrcReg[k].Index] = GL_TRUE;
}
}
}
GLint radeonFindFreeTemporary(struct radeon_transform_context *t)
{
GLboolean used[MAX_PROGRAM_TEMPS];
GLuint i;
_mesa_memset(used, 0, sizeof(used));
scan_instructions(used, t->Program->Instructions, t->Program->NumInstructions);
scan_instructions(used, t->OldInstructions, t->OldNumInstructions);
for (i = 0; i < MAX_PROGRAM_TEMPS; i++) {
if (!used[i])
return i;
}
return -1;
}
/**
* Append the given number of instructions to the program and return a
* pointer to the first new instruction.
*/
struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count)
{
int oldnum = program->NumInstructions;
_mesa_insert_instructions(program, oldnum, count);
return program->Instructions + oldnum;
}

View File

@ -0,0 +1,99 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __RADEON_PROGRAM_H_
#define __RADEON_PROGRAM_H_
#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
#include "shader/program.h"
#include "shader/prog_instruction.h"
enum {
CLAUSE_MIXED = 0,
CLAUSE_ALU,
CLAUSE_TEX
};
enum {
PROGRAM_BUILTIN = PROGRAM_FILE_MAX /**< not a real register, but a special swizzle constant */
};
enum {
OPCODE_REPL_ALPHA = MAX_OPCODE /**< used in paired instructions */
};
#define SWIZZLE_0000 MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO)
#define SWIZZLE_1111 MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE, SWIZZLE_ONE)
/**
* Transformation context that is passed to local transformations.
*
* Care must be taken with some operations during transformation,
* e.g. finding new temporary registers must use @ref radeonFindFreeTemporary
*/
struct radeon_transform_context {
GLcontext *Ctx;
struct gl_program *Program;
struct prog_instruction *OldInstructions;
GLuint OldNumInstructions;
};
/**
* A transformation that can be passed to \ref radeonLocalTransform.
*
* The function will be called once for each instruction.
* It has to either emit the appropriate transformed code for the instruction
* and return GL_TRUE, or return GL_FALSE if it doesn't understand the
* instruction.
*
* The function gets passed the userData as last parameter.
*/
struct radeon_program_transformation {
GLboolean (*function)(
struct radeon_transform_context*,
struct prog_instruction*,
void*);
void *userData;
};
void radeonLocalTransform(
GLcontext* ctx,
struct gl_program *program,
int num_transformations,
struct radeon_program_transformation* transformations);
/**
* Find a usable free temporary register during program transformation
*/
GLint radeonFindFreeTemporary(struct radeon_transform_context *ctx);
struct prog_instruction *radeonAppendInstructions(struct gl_program *program, int count);
#endif

View File

@ -0,0 +1,658 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
/**
* @file
*
* Shareable transformations that transform "special" ALU instructions
* into ALU instructions that are supported by hardware.
*
*/
#include "radeon_program_alu.h"
#include "shader/prog_parameter.h"
static struct prog_instruction *emit1(struct gl_program* p,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg)
{
struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
fpi->Opcode = Opcode;
fpi->SaturateMode = Saturate;
fpi->DstReg = DstReg;
fpi->SrcReg[0] = SrcReg;
return fpi;
}
static struct prog_instruction *emit2(struct gl_program* p,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1)
{
struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
fpi->Opcode = Opcode;
fpi->SaturateMode = Saturate;
fpi->DstReg = DstReg;
fpi->SrcReg[0] = SrcReg0;
fpi->SrcReg[1] = SrcReg1;
return fpi;
}
static struct prog_instruction *emit3(struct gl_program* p,
gl_inst_opcode Opcode, GLuint Saturate, struct prog_dst_register DstReg,
struct prog_src_register SrcReg0, struct prog_src_register SrcReg1,
struct prog_src_register SrcReg2)
{
struct prog_instruction *fpi = radeonAppendInstructions(p, 1);
fpi->Opcode = Opcode;
fpi->SaturateMode = Saturate;
fpi->DstReg = DstReg;
fpi->SrcReg[0] = SrcReg0;
fpi->SrcReg[1] = SrcReg1;
fpi->SrcReg[2] = SrcReg2;
return fpi;
}
static void set_swizzle(struct prog_src_register *SrcReg, int coordinate, int swz)
{
SrcReg->Swizzle &= ~(7 << (3*coordinate));
SrcReg->Swizzle |= swz << (3*coordinate);
}
static void set_negate_base(struct prog_src_register *SrcReg, int coordinate, int negate)
{
SrcReg->NegateBase &= ~(1 << coordinate);
SrcReg->NegateBase |= (negate << coordinate);
}
static struct prog_dst_register dstreg(int file, int index)
{
struct prog_dst_register dst;
dst.File = file;
dst.Index = index;
dst.WriteMask = WRITEMASK_XYZW;
dst.CondMask = COND_TR;
dst.CondSwizzle = SWIZZLE_NOOP;
dst.CondSrc = 0;
dst.pad = 0;
return dst;
}
static struct prog_dst_register dstregtmpmask(int index, int mask)
{
struct prog_dst_register dst;
dst.File = PROGRAM_TEMPORARY;
dst.Index = index;
dst.WriteMask = mask;
dst.CondMask = COND_TR;
dst.CondSwizzle = SWIZZLE_NOOP;
dst.CondSrc = 0;
dst.pad = 0;
return dst;
}
static const struct prog_src_register builtin_zero = {
.File = PROGRAM_BUILTIN,
.Index = 0,
.Swizzle = SWIZZLE_0000
};
static const struct prog_src_register builtin_one = {
.File = PROGRAM_BUILTIN,
.Index = 0,
.Swizzle = SWIZZLE_1111
};
static const struct prog_src_register srcreg_undefined = {
.File = PROGRAM_UNDEFINED,
.Index = 0,
.Swizzle = SWIZZLE_NOOP
};
static struct prog_src_register srcreg(int file, int index)
{
struct prog_src_register src = srcreg_undefined;
src.File = file;
src.Index = index;
return src;
}
static struct prog_src_register srcregswz(int file, int index, int swz)
{
struct prog_src_register src = srcreg_undefined;
src.File = file;
src.Index = index;
src.Swizzle = swz;
return src;
}
static struct prog_src_register absolute(struct prog_src_register reg)
{
struct prog_src_register newreg = reg;
newreg.Abs = 1;
newreg.NegateBase = 0;
newreg.NegateAbs = 0;
return newreg;
}
static struct prog_src_register negate(struct prog_src_register reg)
{
struct prog_src_register newreg = reg;
newreg.NegateAbs = !newreg.NegateAbs;
return newreg;
}
static struct prog_src_register swizzle(struct prog_src_register reg, GLuint x, GLuint y, GLuint z, GLuint w)
{
struct prog_src_register swizzled = reg;
swizzled.Swizzle = MAKE_SWIZZLE4(
x >= 4 ? x : GET_SWZ(reg.Swizzle, x),
y >= 4 ? y : GET_SWZ(reg.Swizzle, y),
z >= 4 ? z : GET_SWZ(reg.Swizzle, z),
w >= 4 ? w : GET_SWZ(reg.Swizzle, w));
return swizzled;
}
static struct prog_src_register scalar(struct prog_src_register reg)
{
return swizzle(reg, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X);
}
static void transform_ABS(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
struct prog_src_register src = inst->SrcReg[0];
src.Abs = 1;
src.NegateBase = 0;
src.NegateAbs = 0;
emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, src);
}
static void transform_DPH(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
struct prog_src_register src0 = inst->SrcReg[0];
if (src0.NegateAbs) {
if (src0.Abs) {
int tempreg = radeonFindFreeTemporary(t);
emit1(t->Program, OPCODE_MOV, 0, dstreg(PROGRAM_TEMPORARY, tempreg), src0);
src0 = srcreg(src0.File, src0.Index);
} else {
src0.NegateAbs = 0;
src0.NegateBase ^= NEGATE_XYZW;
}
}
set_swizzle(&src0, 3, SWIZZLE_ONE);
set_negate_base(&src0, 3, 0);
emit2(t->Program, OPCODE_DP4, inst->SaturateMode, inst->DstReg, src0, inst->SrcReg[1]);
}
/**
* [1, src0.y*src1.y, src0.z, src1.w]
* So basically MUL with lotsa swizzling.
*/
static void transform_DST(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
emit2(t->Program, OPCODE_MUL, inst->SaturateMode, inst->DstReg,
swizzle(inst->SrcReg[0], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE),
swizzle(inst->SrcReg[1], SWIZZLE_ONE, SWIZZLE_Y, SWIZZLE_ONE, SWIZZLE_W));
}
static void transform_FLR(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
emit1(t->Program, OPCODE_FRC, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0]);
emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg,
inst->SrcReg[0], negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
}
/**
* Definition of LIT (from ARB_fragment_program):
*
* tmp = VectorLoad(op0);
* if (tmp.x < 0) tmp.x = 0;
* if (tmp.y < 0) tmp.y = 0;
* if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
* else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
* result.x = 1.0;
* result.y = tmp.x;
* result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
* result.w = 1.0;
*
* The longest path of computation is the one leading to result.z,
* consisting of 5 operations. This implementation of LIT takes
* 5 slots, if the subsequent optimization passes are clever enough
* to pair instructions correctly.
*/
static void transform_LIT(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
static const GLfloat LitConst[4] = { -127.999999 };
GLuint constant;
GLuint constant_swizzle;
GLuint temp;
int needTemporary = 0;
struct prog_src_register srctemp;
constant = _mesa_add_unnamed_constant(t->Program->Parameters, LitConst, 1, &constant_swizzle);
if (inst->DstReg.WriteMask != WRITEMASK_XYZW) {
needTemporary = 1;
} else if (inst->DstReg.File != PROGRAM_TEMPORARY) {
// LIT is typically followed by DP3/DP4, so there's no point
// in creating special code for this case
needTemporary = 1;
}
if (needTemporary) {
temp = radeonFindFreeTemporary(t);
} else {
temp = inst->DstReg.Index;
}
srctemp = srcreg(PROGRAM_TEMPORARY, temp);
// tmp.x = max(0.0, Src.x);
// tmp.y = max(0.0, Src.y);
// tmp.w = clamp(Src.z, -128+eps, 128-eps);
emit2(t->Program, OPCODE_MAX, 0,
dstregtmpmask(temp, WRITEMASK_XYW),
inst->SrcReg[0],
swizzle(srcreg(PROGRAM_CONSTANT, constant),
SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO, constant_swizzle&3));
emit2(t->Program, OPCODE_MIN, 0,
dstregtmpmask(temp, WRITEMASK_Z),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle)));
// tmp.w = Pow(tmp.y, tmp.w)
emit1(t->Program, OPCODE_LG2, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
emit2(t->Program, OPCODE_MUL, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srctemp, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z));
emit1(t->Program, OPCODE_EX2, 0,
dstregtmpmask(temp, WRITEMASK_W),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
// tmp.z = (tmp.x > 0) ? tmp.w : 0.0
emit3(t->Program, OPCODE_CMP, inst->SaturateMode,
dstregtmpmask(temp, WRITEMASK_Z),
negate(swizzle(srctemp, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
swizzle(srctemp, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
builtin_zero);
// tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0
emit1(t->Program, OPCODE_MOV, inst->SaturateMode,
dstregtmpmask(temp, WRITEMASK_XYW),
swizzle(srctemp, SWIZZLE_ONE, SWIZZLE_X, SWIZZLE_ONE, SWIZZLE_ONE));
if (needTemporary)
emit1(t->Program, OPCODE_MOV, 0, inst->DstReg, srctemp);
}
static void transform_LRP(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
emit2(t->Program, OPCODE_ADD, 0,
dstreg(PROGRAM_TEMPORARY, tempreg),
inst->SrcReg[1], negate(inst->SrcReg[2]));
emit3(t->Program, OPCODE_MAD, inst->SaturateMode,
inst->DstReg,
inst->SrcReg[0], srcreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[2]);
}
static void transform_POW(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
struct prog_dst_register tempdst = dstreg(PROGRAM_TEMPORARY, tempreg);
struct prog_src_register tempsrc = srcreg(PROGRAM_TEMPORARY, tempreg);
tempdst.WriteMask = WRITEMASK_W;
tempsrc.Swizzle = SWIZZLE_WWWW;
emit1(t->Program, OPCODE_LG2, 0, tempdst, scalar(inst->SrcReg[0]));
emit2(t->Program, OPCODE_MUL, 0, tempdst, tempsrc, scalar(inst->SrcReg[1]));
emit1(t->Program, OPCODE_EX2, inst->SaturateMode, inst->DstReg, tempsrc);
}
static void transform_RSQ(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
emit1(t->Program, OPCODE_RSQ, inst->SaturateMode, inst->DstReg, absolute(inst->SrcReg[0]));
}
static void transform_SGE(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_zero, builtin_one);
}
static void transform_SLT(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
emit2(t->Program, OPCODE_ADD, 0, dstreg(PROGRAM_TEMPORARY, tempreg), inst->SrcReg[0], negate(inst->SrcReg[1]));
emit3(t->Program, OPCODE_CMP, inst->SaturateMode, inst->DstReg,
srcreg(PROGRAM_TEMPORARY, tempreg), builtin_one, builtin_zero);
}
static void transform_SUB(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
emit2(t->Program, OPCODE_ADD, inst->SaturateMode, inst->DstReg, inst->SrcReg[0], negate(inst->SrcReg[1]));
}
static void transform_SWZ(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
emit1(t->Program, OPCODE_MOV, inst->SaturateMode, inst->DstReg, inst->SrcReg[0]);
}
static void transform_XPD(struct radeon_transform_context* t,
struct prog_instruction* inst)
{
int tempreg = radeonFindFreeTemporary(t);
emit2(t->Program, OPCODE_MUL, 0, dstreg(PROGRAM_TEMPORARY, tempreg),
swizzle(inst->SrcReg[0], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
swizzle(inst->SrcReg[1], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W));
emit3(t->Program, OPCODE_MAD, inst->SaturateMode, inst->DstReg,
swizzle(inst->SrcReg[0], SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, SWIZZLE_W),
swizzle(inst->SrcReg[1], SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, SWIZZLE_W),
negate(srcreg(PROGRAM_TEMPORARY, tempreg)));
}
/**
* Can be used as a transformation for @ref radeonClauseLocalTransform,
* no userData necessary.
*
* Eliminates the following ALU instructions:
* ABS, DPH, DST, FLR, LIT, LRP, POW, SGE, SLT, SUB, SWZ, XPD
* using:
* MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
*
* Transforms RSQ to Radeon's native RSQ by explicitly setting
* absolute value.
*
* @note should be applicable to R300 and R500 fragment programs.
*/
GLboolean radeonTransformALU(struct radeon_transform_context* t,
struct prog_instruction* inst,
void* unused)
{
switch(inst->Opcode) {
case OPCODE_ABS: transform_ABS(t, inst); return GL_TRUE;
case OPCODE_DPH: transform_DPH(t, inst); return GL_TRUE;
case OPCODE_DST: transform_DST(t, inst); return GL_TRUE;
case OPCODE_FLR: transform_FLR(t, inst); return GL_TRUE;
case OPCODE_LIT: transform_LIT(t, inst); return GL_TRUE;
case OPCODE_LRP: transform_LRP(t, inst); return GL_TRUE;
case OPCODE_POW: transform_POW(t, inst); return GL_TRUE;
case OPCODE_RSQ: transform_RSQ(t, inst); return GL_TRUE;
case OPCODE_SGE: transform_SGE(t, inst); return GL_TRUE;
case OPCODE_SLT: transform_SLT(t, inst); return GL_TRUE;
case OPCODE_SUB: transform_SUB(t, inst); return GL_TRUE;
case OPCODE_SWZ: transform_SWZ(t, inst); return GL_TRUE;
case OPCODE_XPD: transform_XPD(t, inst); return GL_TRUE;
default:
return GL_FALSE;
}
}
static void sincos_constants(struct radeon_transform_context* t, GLuint *constants)
{
static const GLfloat SinCosConsts[2][4] = {
{
1.273239545, // 4/PI
-0.405284735, // -4/(PI*PI)
3.141592654, // PI
0.2225 // weight
},
{
0.75,
0.5,
0.159154943, // 1/(2*PI)
6.283185307 // 2*PI
}
};
int i;
for(i = 0; i < 2; ++i) {
GLuint swz;
constants[i] = _mesa_add_unnamed_constant(t->Program->Parameters, SinCosConsts[i], 4, &swz);
ASSERT(swz == SWIZZLE_NOOP);
}
}
/**
* Approximate sin(x), where x is clamped to (-pi/2, pi/2).
*
* MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
* MAD tmp.x, tmp.y, |src|, tmp.x
* MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
* MAD dest, tmp.y, weight, tmp.x
*/
static void sin_approx(struct radeon_transform_context* t,
struct prog_dst_register dst, struct prog_src_register src, const GLuint* constants)
{
GLuint tempreg = radeonFindFreeTemporary(t);
emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
srcreg(PROGRAM_CONSTANT, constants[0]));
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_X),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
absolute(swizzle(src, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_Y),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
absolute(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)),
negate(swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X)));
emit3(t->Program, OPCODE_MAD, 0, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
}
/**
* Translate the trigonometric functions COS, SIN, and SCS
* using only the basic instructions
* MOV, ADD, MUL, MAD, FRC
*/
GLboolean radeonTransformTrigSimple(struct radeon_transform_context* t,
struct prog_instruction* inst,
void* unused)
{
if (inst->Opcode != OPCODE_COS &&
inst->Opcode != OPCODE_SIN &&
inst->Opcode != OPCODE_SCS)
return GL_FALSE;
GLuint constants[2];
GLuint tempreg = radeonFindFreeTemporary(t);
sincos_constants(t, constants);
if (inst->Opcode == OPCODE_COS) {
// MAD tmp.x, src, 1/(2*PI), 0.75
// FRC tmp.x, tmp.x
// MAD tmp.z, tmp.x, 2*PI, -PI
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X));
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
sin_approx(t, inst->DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
} else if (inst->Opcode == OPCODE_SIN) {
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y));
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W));
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_W),
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
sin_approx(t, inst->DstReg,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
constants);
} else {
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W));
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg));
emit3(t->Program, OPCODE_MAD, 0, dstregtmpmask(tempreg, WRITEMASK_XY),
srcreg(PROGRAM_TEMPORARY, tempreg),
swizzle(srcreg(PROGRAM_CONSTANT, constants[1]), SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W),
negate(swizzle(srcreg(PROGRAM_CONSTANT, constants[0]), SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z, SWIZZLE_Z)));
struct prog_dst_register dst = inst->DstReg;
dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_X;
sin_approx(t, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
constants);
dst.WriteMask = inst->DstReg.WriteMask & WRITEMASK_Y;
sin_approx(t, dst,
swizzle(srcreg(PROGRAM_TEMPORARY, tempreg), SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
constants);
}
return GL_TRUE;
}
/**
* Transform the trigonometric functions COS, SIN, and SCS
* to include pre-scaling by 1/(2*PI) and taking the fractional
* part, so that the input to COS and SIN is always in the range [0,1).
* SCS is replaced by one COS and one SIN instruction.
*
* @warning This transformation implicitly changes the semantics of SIN and COS!
*/
GLboolean radeonTransformTrigScale(struct radeon_transform_context* t,
struct prog_instruction* inst,
void* unused)
{
if (inst->Opcode != OPCODE_COS &&
inst->Opcode != OPCODE_SIN &&
inst->Opcode != OPCODE_SCS)
return GL_FALSE;
static const GLfloat RCP_2PI[] = { 0.15915494309189535 };
GLuint temp;
GLuint constant;
GLuint constant_swizzle;
temp = radeonFindFreeTemporary(t);
constant = _mesa_add_unnamed_constant(t->Program->Parameters, RCP_2PI, 1, &constant_swizzle);
emit2(t->Program, OPCODE_MUL, 0, dstregtmpmask(temp, WRITEMASK_W),
swizzle(inst->SrcReg[0], SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
srcregswz(PROGRAM_CONSTANT, constant, constant_swizzle));
emit1(t->Program, OPCODE_FRC, 0, dstregtmpmask(temp, WRITEMASK_W),
srcreg(PROGRAM_TEMPORARY, temp));
if (inst->Opcode == OPCODE_COS) {
emit1(t->Program, OPCODE_COS, inst->SaturateMode, inst->DstReg,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
} else if (inst->Opcode == OPCODE_SIN) {
emit1(t->Program, OPCODE_SIN, inst->SaturateMode,
inst->DstReg, srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
} else if (inst->Opcode == OPCODE_SCS) {
struct prog_dst_register moddst = inst->DstReg;
if (inst->DstReg.WriteMask & WRITEMASK_X) {
moddst.WriteMask = WRITEMASK_X;
emit1(t->Program, OPCODE_COS, inst->SaturateMode, moddst,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
}
if (inst->DstReg.WriteMask & WRITEMASK_Y) {
moddst.WriteMask = WRITEMASK_Y;
emit1(t->Program, OPCODE_SIN, inst->SaturateMode, moddst,
srcregswz(PROGRAM_TEMPORARY, temp, SWIZZLE_WWWW));
}
}
return GL_TRUE;
}
/**
* Rewrite DDX/DDY instructions to properly work with r5xx shaders.
* The r5xx MDH/MDV instruction provides per-quad partial derivatives.
* It takes the form A*B+C. A and C are set by setting src0. B should be -1.
*
* @warning This explicitly changes the form of DDX and DDY!
*/
GLboolean radeonTransformDeriv(struct radeon_transform_context* t,
struct prog_instruction* inst,
void* unused)
{
if (inst->Opcode != OPCODE_DDX && inst->Opcode != OPCODE_DDY)
return GL_FALSE;
struct prog_src_register B = inst->SrcReg[1];
B.Swizzle = MAKE_SWIZZLE4(SWIZZLE_ONE, SWIZZLE_ONE,
SWIZZLE_ONE, SWIZZLE_ONE);
B.NegateBase = NEGATE_XYZW;
emit2(t->Program, inst->Opcode, inst->SaturateMode, inst->DstReg,
inst->SrcReg[0], B);
return GL_TRUE;
}

View File

@ -0,0 +1,53 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __RADEON_PROGRAM_ALU_H_
#define __RADEON_PROGRAM_ALU_H_
#include "radeon_program.h"
GLboolean radeonTransformALU(
struct radeon_transform_context *t,
struct prog_instruction*,
void*);
GLboolean radeonTransformTrigSimple(
struct radeon_transform_context *t,
struct prog_instruction*,
void*);
GLboolean radeonTransformTrigScale(
struct radeon_transform_context *t,
struct prog_instruction*,
void*);
GLboolean radeonTransformDeriv(
struct radeon_transform_context *t,
struct prog_instruction*,
void*);
#endif /* __RADEON_PROGRAM_ALU_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,126 @@
/*
* Copyright (C) 2008 Nicolai Haehnle.
*
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __RADEON_PROGRAM_PAIR_H_
#define __RADEON_PROGRAM_PAIR_H_
#include "radeon_program.h"
/**
* Represents a paired instruction, as found in R300 and R500
* fragment programs.
*/
struct radeon_pair_instruction_source {
GLuint Index:8;
GLuint Constant:1;
GLuint Used:1;
};
struct radeon_pair_instruction_rgb {
GLuint Opcode:8;
GLuint DestIndex:8;
GLuint WriteMask:3;
GLuint OutputWriteMask:3;
GLuint Saturate:1;
struct radeon_pair_instruction_source Src[3];
struct {
GLuint Source:2;
GLuint Swizzle:9;
GLuint Abs:1;
GLuint Negate:1;
} Arg[3];
};
struct radeon_pair_instruction_alpha {
GLuint Opcode:8;
GLuint DestIndex:8;
GLuint WriteMask:1;
GLuint OutputWriteMask:1;
GLuint DepthWriteMask:1;
GLuint Saturate:1;
struct radeon_pair_instruction_source Src[3];
struct {
GLuint Source:2;
GLuint Swizzle:3;
GLuint Abs:1;
GLuint Negate:1;
} Arg[3];
};
struct radeon_pair_instruction {
struct radeon_pair_instruction_rgb RGB;
struct radeon_pair_instruction_alpha Alpha;
};
/**
*
*/
struct radeon_pair_handler {
/**
* Fill in the proper hardware index for the given constant register.
*
* @return GL_FALSE on error.
*/
GLboolean (*EmitConst)(void*, GLuint file, GLuint index, GLuint *hwindex);
/**
* Write a paired instruction to the hardware.
*
* @return GL_FALSE on error.
*/
GLboolean (*EmitPaired)(void*, struct radeon_pair_instruction*);
/**
* Write a texture instruction to the hardware.
* Register indices have already been rewritten to the allocated
* hardware register numbers.
*
* @return GL_FALSE on error.
*/
GLboolean (*EmitTex)(void*, struct prog_instruction*);
/**
* Called before a block of contiguous, independent texture
* instructions is emitted.
*/
GLboolean (*BeginTexBlock)(void*);
GLuint MaxHwTemps;
};
GLboolean radeonPairProgram(GLcontext *ctx, struct gl_program *program,
const struct radeon_pair_handler*, void *userdata);
void radeonPrintPairInstruction(struct radeon_pair_instruction *inst);
#endif /* __RADEON_PROGRAM_PAIR_H_ */

View File

@ -255,6 +255,124 @@
#define PCI_CHIP_RS740_796E 0x796E
#define PCI_CHIP_RS740_796F 0x796F
#define PCI_CHIP_R600_9400 0x9400
#define PCI_CHIP_R600_9401 0x9401
#define PCI_CHIP_R600_9402 0x9402
#define PCI_CHIP_R600_9403 0x9403
#define PCI_CHIP_R600_9405 0x9405
#define PCI_CHIP_R600_940A 0x940A
#define PCI_CHIP_R600_940B 0x940B
#define PCI_CHIP_R600_940F 0x940F
#define PCI_CHIP_RV610_94C0 0x94C0
#define PCI_CHIP_RV610_94C1 0x94C1
#define PCI_CHIP_RV610_94C3 0x94C3
#define PCI_CHIP_RV610_94C4 0x94C4
#define PCI_CHIP_RV610_94C5 0x94C5
#define PCI_CHIP_RV610_94C6 0x94C6
#define PCI_CHIP_RV610_94C7 0x94C7
#define PCI_CHIP_RV610_94C8 0x94C8
#define PCI_CHIP_RV610_94C9 0x94C9
#define PCI_CHIP_RV610_94CB 0x94CB
#define PCI_CHIP_RV610_94CC 0x94CC
#define PCI_CHIP_RV610_94CD 0x94CD
#define PCI_CHIP_RV630_9580 0x9580
#define PCI_CHIP_RV630_9581 0x9581
#define PCI_CHIP_RV630_9583 0x9583
#define PCI_CHIP_RV630_9586 0x9586
#define PCI_CHIP_RV630_9587 0x9587
#define PCI_CHIP_RV630_9588 0x9588
#define PCI_CHIP_RV630_9589 0x9589
#define PCI_CHIP_RV630_958A 0x958A
#define PCI_CHIP_RV630_958B 0x958B
#define PCI_CHIP_RV630_958C 0x958C
#define PCI_CHIP_RV630_958D 0x958D
#define PCI_CHIP_RV630_958E 0x958E
#define PCI_CHIP_RV630_958F 0x958F
#define PCI_CHIP_RV670_9500 0x9500
#define PCI_CHIP_RV670_9501 0x9501
#define PCI_CHIP_RV670_9504 0x9504
#define PCI_CHIP_RV670_9505 0x9505
#define PCI_CHIP_RV670_9506 0x9506
#define PCI_CHIP_RV670_9507 0x9507
#define PCI_CHIP_RV670_9508 0x9508
#define PCI_CHIP_RV670_9509 0x9509
#define PCI_CHIP_RV670_950F 0x950F
#define PCI_CHIP_RV670_9511 0x9511
#define PCI_CHIP_RV670_9515 0x9515
#define PCI_CHIP_RV670_9517 0x9517
#define PCI_CHIP_RV670_9519 0x9519
#define PCI_CHIP_RV620_95C0 0x95C0
#define PCI_CHIP_RV620_95C2 0x95C2
#define PCI_CHIP_RV620_95C4 0x95C4
#define PCI_CHIP_RV620_95C5 0x95C5
#define PCI_CHIP_RV620_95C6 0x95C6
#define PCI_CHIP_RV620_95C7 0x95C7
#define PCI_CHIP_RV620_95C9 0x95C9
#define PCI_CHIP_RV620_95CC 0x95CC
#define PCI_CHIP_RV620_95CD 0x95CD
#define PCI_CHIP_RV620_95CE 0x95CE
#define PCI_CHIP_RV620_95CF 0x95CF
#define PCI_CHIP_RV635_9590 0x9590
#define PCI_CHIP_RV635_9591 0x9591
#define PCI_CHIP_RV635_9593 0x9593
#define PCI_CHIP_RV635_9595 0x9595
#define PCI_CHIP_RV635_9596 0x9596
#define PCI_CHIP_RV635_9597 0x9597
#define PCI_CHIP_RV635_9598 0x9598
#define PCI_CHIP_RV635_9599 0x9599
#define PCI_CHIP_RV635_959B 0x959B
#define PCI_CHIP_RS780_9611 0x9611
#define PCI_CHIP_RS780_9612 0x9612
#define PCI_CHIP_RS780_9613 0x9613
#define PCI_CHIP_RS780_9614 0x9614
#define PCI_CHIP_RS780_9615 0x9615
#define PCI_CHIP_RS780_9616 0x9616
#define PCI_CHIP_RV770_9440 0x9440
#define PCI_CHIP_RV770_9441 0x9441
#define PCI_CHIP_RV770_9442 0x9442
#define PCI_CHIP_RV770_9444 0x9444
#define PCI_CHIP_RV770_9446 0x9446
#define PCI_CHIP_RV770_944A 0x944A
#define PCI_CHIP_RV770_944B 0x944B
#define PCI_CHIP_RV770_944C 0x944C
#define PCI_CHIP_RV770_944E 0x944E
#define PCI_CHIP_RV770_9450 0x9450
#define PCI_CHIP_RV770_9452 0x9452
#define PCI_CHIP_RV770_9456 0x9456
#define PCI_CHIP_RV770_945A 0x945A
#define PCI_CHIP_RV770_945B 0x945B
#define PCI_CHIP_RV790_9460 0x9460
#define PCI_CHIP_RV790_9462 0x9462
#define PCI_CHIP_RV770_946A 0x946A
#define PCI_CHIP_RV770_946B 0x946B
#define PCI_CHIP_RV770_947A 0x947A
#define PCI_CHIP_RV770_947B 0x947B
#define PCI_CHIP_RV730_9487 0x9487
#define PCI_CHIP_RV730_9489 0x9489
#define PCI_CHIP_RV730_948F 0x948F
#define PCI_CHIP_RV730_9490 0x9490
#define PCI_CHIP_RV730_9491 0x9491
#define PCI_CHIP_RV730_9498 0x9498
#define PCI_CHIP_RV730_949C 0x949C
#define PCI_CHIP_RV730_949E 0x949E
#define PCI_CHIP_RV730_949F 0x949F
#define PCI_CHIP_RV710_9540 0x9540
#define PCI_CHIP_RV710_9541 0x9541
#define PCI_CHIP_RV710_9542 0x9542
#define PCI_CHIP_RV710_954E 0x954E
#define PCI_CHIP_RV710_954F 0x954F
#define PCI_CHIP_RV710_9552 0x9552
#define PCI_CHIP_RV710_9553 0x9553
#define PCI_CHIP_RV710_9555 0x9555
enum {
CHIP_FAMILY_R100,
@ -282,6 +400,16 @@ enum {
CHIP_FAMILY_R580,
CHIP_FAMILY_RV560,
CHIP_FAMILY_RV570,
CHIP_FAMILY_R600,
CHIP_FAMILY_RV610,
CHIP_FAMILY_RV630,
CHIP_FAMILY_RV670,
CHIP_FAMILY_RV620,
CHIP_FAMILY_RV635,
CHIP_FAMILY_RS780,
CHIP_FAMILY_RV770,
CHIP_FAMILY_RV730,
CHIP_FAMILY_RV710,
CHIP_FAMILY_LAST
};
@ -289,6 +417,7 @@ enum {
#define RADEON_CLASS_R100 (0 << 0)
#define RADEON_CLASS_R200 (1 << 0)
#define RADEON_CLASS_R300 (2 << 0)
#define RADEON_CLASS_R600 (3 << 0)
#define RADEON_CLASS_MASK (3 << 0)
#define RADEON_CHIPSET_TCL (1 << 2) /* tcl support - any radeon */

View File

@ -60,7 +60,9 @@ static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
switch (name) {
case GL_VENDOR:
if (IS_R300_CLASS(radeon->radeonScreen))
if (IS_R600_CLASS(radeon->radeonScreen))
return (GLubyte *) "Advanced Micro Devices, Inc.";
else if (IS_R300_CLASS(radeon->radeonScreen))
return (GLubyte *) "DRI R300 Project";
else
return (GLubyte *) "Tungsten Graphics, Inc.";
@ -72,7 +74,9 @@ static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
radeon->radeonScreen->AGPMode;
const char* chipname;
if (IS_R300_CLASS(radeon->radeonScreen))
if (IS_R600_CLASS(radeon->radeonScreen))
chipname = "R600";
else if (IS_R300_CLASS(radeon->radeonScreen))
chipname = "R300";
else if (IS_R200_CLASS(radeon->radeonScreen))
chipname = "R200";
@ -82,7 +86,9 @@ static const GLubyte *radeonGetString(GLcontext * ctx, GLenum name)
offset = driGetRendererString(buffer, chipname, DRIVER_DATE,
agp_mode);
if (IS_R300_CLASS(radeon->radeonScreen)) {
if (IS_R600_CLASS(radeon->radeonScreen)) {
sprintf(&buffer[offset], " TCL");
} else if (IS_R300_CLASS(radeon->radeonScreen)) {
sprintf(&buffer[offset], " %sTCL",
(radeon->radeonScreen->chip_flags & RADEON_CHIPSET_TCL)
? "" : "NO-");

View File

@ -59,6 +59,10 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r300_context.h"
#include "r300_fragprog.h"
#include "r300_tex.h"
#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
#include "r600_context.h"
#include "r600_fragprog.h"
#include "r600_tex.h"
#endif
#include "utils.h"
@ -144,7 +148,7 @@ extern const struct dri_extension NV_vp_extension[];
extern const struct dri_extension ATI_fs_extension[];
extern const struct dri_extension point_extensions[];
#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
#elif RADEON_COMMON && (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
/* TODO: integrate these into xmlpool.h! */
#define DRI_CONF_MAX_TEXTURE_IMAGE_UNITS(def,min,max) \
@ -393,6 +397,19 @@ static const __DRItexBufferExtension r300TexBufferExtension = {
};
#endif
#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
static const __DRItexOffsetExtension r300texOffsetExtension = {
{ __DRI_TEX_OFFSET, __DRI_TEX_OFFSET_VERSION },
r300SetTexOffset,
};
static const __DRItexBufferExtension r300TexBufferExtension = {
{ __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION },
r300SetTexBuffer,
r300SetTexBuffer2,
};
#endif
static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
{
screen->chip_flags = 0;
@ -732,6 +749,155 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
case PCI_CHIP_R600_9400:
case PCI_CHIP_R600_9401:
case PCI_CHIP_R600_9402:
case PCI_CHIP_R600_9403:
case PCI_CHIP_R600_9405:
case PCI_CHIP_R600_940A:
case PCI_CHIP_R600_940B:
case PCI_CHIP_R600_940F:
screen->chip_family = CHIP_FAMILY_R600;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
case PCI_CHIP_RV610_94C0:
case PCI_CHIP_RV610_94C1:
case PCI_CHIP_RV610_94C3:
case PCI_CHIP_RV610_94C4:
case PCI_CHIP_RV610_94C5:
case PCI_CHIP_RV610_94C6:
case PCI_CHIP_RV610_94C7:
case PCI_CHIP_RV610_94C8:
case PCI_CHIP_RV610_94C9:
case PCI_CHIP_RV610_94CB:
case PCI_CHIP_RV610_94CC:
case PCI_CHIP_RV610_94CD:
screen->chip_family = CHIP_FAMILY_RV610;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
case PCI_CHIP_RV630_9580:
case PCI_CHIP_RV630_9581:
case PCI_CHIP_RV630_9583:
case PCI_CHIP_RV630_9586:
case PCI_CHIP_RV630_9587:
case PCI_CHIP_RV630_9588:
case PCI_CHIP_RV630_9589:
case PCI_CHIP_RV630_958A:
case PCI_CHIP_RV630_958B:
case PCI_CHIP_RV630_958C:
case PCI_CHIP_RV630_958D:
case PCI_CHIP_RV630_958E:
case PCI_CHIP_RV630_958F:
screen->chip_family = CHIP_FAMILY_RV630;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
case PCI_CHIP_RV670_9500:
case PCI_CHIP_RV670_9501:
case PCI_CHIP_RV670_9504:
case PCI_CHIP_RV670_9505:
case PCI_CHIP_RV670_9506:
case PCI_CHIP_RV670_9507:
case PCI_CHIP_RV670_9508:
case PCI_CHIP_RV670_9509:
case PCI_CHIP_RV670_950F:
case PCI_CHIP_RV670_9511:
case PCI_CHIP_RV670_9515:
case PCI_CHIP_RV670_9517:
case PCI_CHIP_RV670_9519:
screen->chip_family = CHIP_FAMILY_RV670;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
case PCI_CHIP_RV620_95C0:
case PCI_CHIP_RV620_95C2:
case PCI_CHIP_RV620_95C4:
case PCI_CHIP_RV620_95C5:
case PCI_CHIP_RV620_95C6:
case PCI_CHIP_RV620_95C7:
case PCI_CHIP_RV620_95C9:
case PCI_CHIP_RV620_95CC:
case PCI_CHIP_RV620_95CD:
case PCI_CHIP_RV620_95CE:
case PCI_CHIP_RV620_95CF:
screen->chip_family = CHIP_FAMILY_RV620;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
case PCI_CHIP_RV635_9590:
case PCI_CHIP_RV635_9591:
case PCI_CHIP_RV635_9593:
case PCI_CHIP_RV635_9595:
case PCI_CHIP_RV635_9596:
case PCI_CHIP_RV635_9597:
case PCI_CHIP_RV635_9598:
case PCI_CHIP_RV635_9599:
case PCI_CHIP_RV635_959B:
screen->chip_family = CHIP_FAMILY_RV635;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
case PCI_CHIP_RS780_9611:
case PCI_CHIP_RS780_9612:
case PCI_CHIP_RS780_9613:
case PCI_CHIP_RS780_9614:
case PCI_CHIP_RS780_9615:
case PCI_CHIP_RS780_9616:
screen->chip_family = CHIP_FAMILY_RS780;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
case PCI_CHIP_RV770_9440:
case PCI_CHIP_RV770_9441:
case PCI_CHIP_RV770_9442:
case PCI_CHIP_RV770_9444:
case PCI_CHIP_RV770_9446:
case PCI_CHIP_RV770_944A:
case PCI_CHIP_RV770_944B:
case PCI_CHIP_RV770_944C:
case PCI_CHIP_RV770_944E:
case PCI_CHIP_RV770_9450:
case PCI_CHIP_RV770_9452:
case PCI_CHIP_RV770_9456:
case PCI_CHIP_RV770_945A:
case PCI_CHIP_RV770_945B:
case PCI_CHIP_RV790_9460:
case PCI_CHIP_RV790_9462:
case PCI_CHIP_RV770_946A:
case PCI_CHIP_RV770_946B:
case PCI_CHIP_RV770_947A:
case PCI_CHIP_RV770_947B:
screen->chip_family = CHIP_FAMILY_RV770;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
case PCI_CHIP_RV730_9487:
case PCI_CHIP_RV730_9489:
case PCI_CHIP_RV730_948F:
case PCI_CHIP_RV730_9490:
case PCI_CHIP_RV730_9491:
case PCI_CHIP_RV730_9498:
case PCI_CHIP_RV730_949C:
case PCI_CHIP_RV730_949E:
case PCI_CHIP_RV730_949F:
screen->chip_family = CHIP_FAMILY_RV730;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
case PCI_CHIP_RV710_9540:
case PCI_CHIP_RV710_9541:
case PCI_CHIP_RV710_9542:
case PCI_CHIP_RV710_954E:
case PCI_CHIP_RV710_954F:
case PCI_CHIP_RV710_9552:
case PCI_CHIP_RV710_9553:
case PCI_CHIP_RV710_9555:
screen->chip_family = CHIP_FAMILY_RV710;
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
default:
fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
device_id);
@ -901,14 +1067,16 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
}
if (getenv("R300_NO_TCL"))
screen->chip_flags &= ~RADEON_CHIPSET_TCL;
screen->chip_flags &= ~RADEON_CHIPSET_TCL;
if (screen->chip_family <= CHIP_FAMILY_RS200)
screen->chip_flags |= RADEON_CLASS_R100;
screen->chip_flags |= RADEON_CLASS_R100;
else if (screen->chip_family <= CHIP_FAMILY_RV280)
screen->chip_flags |= RADEON_CLASS_R200;
screen->chip_flags |= RADEON_CLASS_R200;
else if (screen->chip_family <= CHIP_FAMILY_RV570)
screen->chip_flags |= RADEON_CLASS_R300;
else
screen->chip_flags |= RADEON_CLASS_R300;
screen->chip_flags |= RADEON_CLASS_R600;
screen->cpp = dri_priv->bpp / 8;
screen->AGPMode = dri_priv->AGPMode;
@ -926,7 +1094,7 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
screen->fbLocation = (temp & 0xffff) << 16;
}
if (screen->chip_family >= CHIP_FAMILY_R300) {
if (IS_R300_CLASS(screen)) {
ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
if (ret) {
fprintf(stderr, "Unable to get num_pipes, need newer drm\n");
@ -1031,6 +1199,10 @@ radeonCreateScreen( __DRIscreenPrivate *sPriv )
#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
screen->extensions[i++] = &r300texOffsetExtension.base;
#endif
#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
screen->extensions[i++] = &r300texOffsetExtension.base;
#endif
}
screen->extensions[i++] = NULL;
@ -1095,7 +1267,19 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv)
if (ret == -1)
return NULL;
if (screen->chip_family >= CHIP_FAMILY_R300) {
if (getenv("R300_NO_TCL"))
screen->chip_flags &= ~RADEON_CHIPSET_TCL;
if (screen->chip_family <= CHIP_FAMILY_RS200)
screen->chip_flags |= RADEON_CLASS_R100;
else if (screen->chip_family <= CHIP_FAMILY_RV280)
screen->chip_flags |= RADEON_CLASS_R200;
else if (screen->chip_family <= CHIP_FAMILY_RV570)
screen->chip_flags |= RADEON_CLASS_R300;
else
screen->chip_flags |= RADEON_CLASS_R600;
if (IS_R300_CLASS(screen)) {
ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
if (ret) {
fprintf(stderr, "Unable to get num_pipes, need newer drm\n");
@ -1124,16 +1308,6 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv)
}
}
if (screen->chip_family <= CHIP_FAMILY_RS200)
screen->chip_flags |= RADEON_CLASS_R100;
else if (screen->chip_family <= CHIP_FAMILY_RV280)
screen->chip_flags |= RADEON_CLASS_R200;
else
screen->chip_flags |= RADEON_CLASS_R300;
if (getenv("R300_NO_TCL"))
screen->chip_flags &= ~RADEON_CHIPSET_TCL;
i = 0;
screen->extensions[i++] = &driCopySubBufferExtension.base;
screen->extensions[i++] = &driFrameTrackingExtension.base;
@ -1159,6 +1333,10 @@ radeonCreateScreen2(__DRIscreenPrivate *sPriv)
screen->extensions[i++] = &r300TexBufferExtension.base;
#endif
#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
screen->extensions[i++] = &r300TexBufferExtension.base;
#endif
screen->extensions[i++] = NULL;
sPriv->extensions = screen->extensions;
@ -1352,6 +1530,11 @@ static GLboolean radeonCreateContext(const __GLcontextModes * glVisual,
{
__DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv;
radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
if (IS_R600_CLASS(screen))
return r300CreateContext(glVisual, driContextPriv, sharedContextPriv);
#endif
#if RADEON_COMMON && defined(RADEON_COMMON_FOR_R300)
if (IS_R300_CLASS(screen))
return r300CreateContext(glVisual, driContextPriv, sharedContextPriv);
@ -1394,6 +1577,11 @@ radeonInitScreen(__DRIscreenPrivate *psp)
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
static const __DRIversion dri_expected = { 4, 0, 0 };
static const __DRIversion drm_expected = { 1, 24, 0 };
#elif RADEON_COMMON && defined(RADEON_COMMON_FOR_R600)
static const char *driver_name = "R600";
static const __DRIutilversion2 ddx_expected = { 4, 5, 0, 0 };
static const __DRIversion dri_expected = { 4, 0, 0 };
static const __DRIversion drm_expected = { 1, 24, 0 };
#endif
RADEONDRIPtr dri_priv = (RADEONDRIPtr) psp->pDevPriv;
@ -1421,7 +1609,7 @@ radeonInitScreen(__DRIscreenPrivate *psp)
driInitSingleExtension( NULL, NV_vp_extension );
driInitSingleExtension( NULL, ATI_fs_extension );
driInitExtensions( NULL, point_extensions, GL_FALSE );
#elif defined(RADEON_COMMON_FOR_R300)
#elif (defined(RADEON_COMMON_FOR_R300) || defined(RADEON_COMMON_FOR_R600))
driInitSingleExtension( NULL, gl_20_extension );
#endif

View File

@ -117,6 +117,8 @@ typedef struct radeon_screen {
((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R200)
#define IS_R300_CLASS(screen) \
((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R300)
#define IS_R600_CLASS(screen) \
((screen->chip_flags & RADEON_CLASS_MASK) == RADEON_CLASS_R600)
extern void radeonDestroyBuffer(__DRIdrawablePrivate *driDrawPriv);
#endif /* __RADEON_SCREEN_H__ */