ppsspp/GPU/Software/Rasterizer.cpp

// Copyright (c) 2013- PPSSPP Project.

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License 2.0 for more details.

// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/

// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.

#include "../../Core/MemMap.h"
#include "../GPUState.h"

#include "Rasterizer.h"

extern u8* fb;
extern u8* depthbuf;

extern u32 clut[4096];

namespace Rasterizer {

static int orient2d(const DrawingCoords& v0, const DrawingCoords& v1, const DrawingCoords& v2)
{
	return ((int)v1.x-(int)v0.x)*((int)v2.y-(int)v0.y) - ((int)v1.y-(int)v0.y)*((int)v2.x-(int)v0.x);
}

int GetPixelDataOffset(int texel_size_bits, int row_pitch_bits, int u, int v)
{
	if (!(gstate.texmode & 1))
		return v * row_pitch_bits *texel_size_bits/8 / 8 + u * texel_size_bits / 8;

	int tile_size_bits = 32;
	int tiles_in_block_horizontal = 4;
	int tiles_in_block_vertical = 8;

	int texels_per_tile = tile_size_bits / texel_size_bits;
	int tile_u = u / texels_per_tile;

	int tile_idx = (v % tiles_in_block_vertical) * (tiles_in_block_horizontal) +
	// TODO: not sure if the *texel_size_bits/8 factor is correct
					(v / tiles_in_block_vertical) * ((row_pitch_bits*texel_size_bits/8/tile_size_bits)*tiles_in_block_vertical) +
					(tile_u % tiles_in_block_horizontal) +
					(tile_u / tiles_in_block_horizontal) * (tiles_in_block_horizontal*tiles_in_block_vertical);
	return tile_idx * tile_size_bits/8 + ((u % (tile_size_bits / texel_size_bits)));
}

u32 DecodeRGBA4444(u16 src)
{
	u8 r = src & 0xFF;
	u8 g = (src>>4) & 0xFF;
	u8 b = (src>>8) & 0xFF;
	u8 a = (src>>12) & 0xFF;
	r = (r << 4) | r;
	g = (g << 4) | g;
	b = (b << 4) | b;
	a = (a << 4) | a;
	return (r << 24) | (g << 16) | (b << 8) | a;
}

u32 DecodeRGBA5551(u16 src)
{
	u8 r = src & 0x1F;
	u8 g = (src >> 5) & 0x1F;
	u8 b = (src >> 10) & 0x1F;
	u8 a = (src >> 15) & 0x1;
	r = (r << 3) | (r >> 2);
	g = (g << 3) | (g >> 2);
	b = (b << 3) | (b >> 2);
	a = (a) ? 0xff : 0;
	return (r << 24) | (g << 16) | (b << 8) | a;
}

u32 DecodeRGB565(u16 src)
{
	u8 r = src & 0x1F;
	u8 g = (src >> 5) & 0x3F;
	u8 b = (src >> 11) & 0x1F;
	u8 a = 0; // TODO: Might want to use 0xFF here instead?
	r = (r << 3) | (r >> 2);
	g = (g << 2) | (g >> 4);
	b = (b << 3) | (b >> 2);
	return (r << 24) | (g << 16) | (b << 8) | a;
}

u32 DecodeRGBA8888(u32 src)
{
	u8 r = src & 0xFF;
	u8 g = (src >> 8) & 0xFF;
	u8 b = (src >> 16) & 0xFF;
	u8 a = (src >> 24) & 0xFF;
	return (r << 24) | (g << 16) | (b << 8) | a;
}

u32 SampleNearest(int level, float s, float t)
{
	int texfmt = gstate.texformat & 0xF;
	u32 texaddr = (gstate.texaddr[level] & 0xFFFFF0) | ((gstate.texbufwidth[level] << 8) & 0x0F000000);
	u8* srcptr = (u8*)Memory::GetPointer(texaddr); // TODO: not sure if this is the right place to load from...?

	int width = 1 << (gstate.texsize[level] & 0xf);
	int height = 1 << ((gstate.texsize[level]>>8) & 0xf);

	// Special rules for kernel textures (PPGe), TODO: Verify!
	int texbufwidth = (texaddr < PSP_GetUserMemoryBase()) ? gstate.texbufwidth[level] & 0x1FFF : gstate.texbufwidth[level] & 0x7FF;

	// TODO: Should probably check if textures are aligned properly...

	// TODO: Not sure if that through mode treatment is correct..
	int u = (gstate.isModeThrough()) ? s : s * width; // TODO: -1?
	int v = (gstate.isModeThrough()) ? t : t * height; // TODO: -1?

	// TODO: texcoord wrapping!!

	// TODO: Assert tmap.tmn == 0 (uv texture mapping mode)

	if (texfmt == GE_TFMT_4444) {
		srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v);
		return DecodeRGBA4444(*(u16*)srcptr);
	} else if (texfmt == GE_TFMT_5551) {
		srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v);
		return DecodeRGBA5551(*(u16*)srcptr);
	} else if (texfmt == GE_TFMT_5650) {
		srcptr += GetPixelDataOffset(16, texbufwidth*8, u, v);
		return DecodeRGB565(*(u16*)srcptr);
	} else if (texfmt == GE_TFMT_8888) {
		srcptr += GetPixelDataOffset(32, texbufwidth*8, u, v);
		return DecodeRGBA8888(*(u32*)srcptr);
	} else if (texfmt == GE_TFMT_CLUT8) {
		srcptr += GetPixelDataOffset(8, texbufwidth*8, u, v);

		u16 index = (((u32)*srcptr) >> gstate.getClutIndexShift()) & 0xFF;
		index &= gstate.getClutIndexMask();
		index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos

		// TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888;
		return clut[index];
	} else if (texfmt == GE_TFMT_CLUT4) {
		srcptr += GetPixelDataOffset(4, texbufwidth*8, u, v);

		u8 val = (u%2) ? (*srcptr & 0xF) : (*srcptr >> 4); // TODO: Check if order is correct
		u16 index = (((u32)val) >> gstate.getClutIndexShift()) & 0xFF;
		index &= gstate.getClutIndexMask();
		index = (index & 0xFF) | gstate.getClutIndexStartPos(); // Topmost bit is copied from start pos

		// TODO: Assert that we're using GE_CMODE_32BIT_ABGR8888;
		return clut[index];
	} else {
		ERROR_LOG(G3D, "Unsupported texture format: %x", texfmt);
		return 0;
	}
}

// NOTE: These likely aren't endian safe
static inline u32 GetPixelColor(int x, int y)
{
	// TODO: Fix for other pixel formats!
	return *(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()];
}

static inline void SetPixelColor(int x, int y, u32 value)
{
	*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = value;
}

static inline u16 GetPixelDepth(int x, int y)
{
	return *(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()];
}

static inline void SetPixelDepth(int x, int y, u16 value)
{
	*(u16*)&depthbuf[2*x + 2*y*gstate.DepthBufStride()] = value;
}

static inline u8 GetPixelStencil(int x, int y)
{
	return (((*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()]) & 0x80000000) != 0) ? 0xFF : 0;
}

static inline void SetPixelStencil(int x, int y, u8 value)
{
	*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] = (*(u32*)&fb[4*x + 4*y*gstate.FrameBufStride()] & ~0x80000000) | ((value&0x80)<<24);
}

static inline bool DepthTestPassed(int x, int y, u16 z)
{
	u16 reference_z = GetPixelDepth(x, y);

	if (gstate.isModeClear())
		return true;

	switch (gstate.getDepthTestFunc()) {
	case GE_COMP_NEVER:
		return false;

	case GE_COMP_ALWAYS:
		return true;

	case GE_COMP_EQUAL:
		return (z == reference_z);

	case GE_COMP_NOTEQUAL:
		return (z != reference_z);

	case GE_COMP_LESS:
		return (z < reference_z);

	case GE_COMP_LEQUAL:
		return (z <= reference_z);

	case GE_COMP_GREATER:
		return (z > reference_z);

	case GE_COMP_GEQUAL:
		return (z >= reference_z);

	default:
		return 0;
	}
}

bool IsRightSideOrFlatBottomLine(const Vec2<u10>& vertex, const Vec2<u10>& line1, const Vec2<u10>& line2)
{
	if (line1.y == line2.y) {
		// just check if vertex is above us => bottom line parallel to x-axis
		return vertex.y < line1.y;
	} else {
		// check if vertex is on our left => right side
		return vertex.x < line1.x + (line2.x - line1.x) * (vertex.y - line1.y) / (line2.y - line1.y);
	}
}

void ApplyStencilOp(int op, int x, int y)
{
	u8 old_stencil = GetPixelStencil(x, y); // TODO: Apply mask?
	u8 reference_stencil = gstate.getStencilTestRef(); // TODO: Apply mask?

	switch (op) {
		case GE_STENCILOP_KEEP:
			return;

		case GE_STENCILOP_ZERO:
			SetPixelStencil(x, y, 0);
			return;

		case GE_STENCILOP_REPLACE:
			SetPixelStencil(x, y, reference_stencil);
			break;

		case GE_STENCILOP_INVERT:
			SetPixelStencil(x, y, ~old_stencil);
			break;

		case GE_STENCILOP_INCR:
			// TODO: Does this overflow?
			SetPixelStencil(x, y, old_stencil+1);
			break;

		case GE_STENCILOP_DECR:
			// TODO: Does this underflow?
			SetPixelStencil(x, y, old_stencil-1);
			break;
	}
}

// Draws triangle, vertices specified in counter-clockwise direction (TODO: Make sure this is actually enforced)
void DrawTriangle(const VertexData& v0, const VertexData& v1, const VertexData& v2)
{
	int minX = std::min(std::min(v0.drawpos.x, v1.drawpos.x), v2.drawpos.x);
	int minY = std::min(std::min(v0.drawpos.y, v1.drawpos.y), v2.drawpos.y);
	int maxX = std::max(std::max(v0.drawpos.x, v1.drawpos.x), v2.drawpos.x);
	int maxY = std::max(std::max(v0.drawpos.y, v1.drawpos.y), v2.drawpos.y);

	minX = std::max(minX, gstate.getScissorX1());
	maxX = std::min(maxX, gstate.getScissorX2());
	minY = std::max(minY, gstate.getScissorY1());
	maxY = std::min(maxY, gstate.getScissorY2());

	int bias0 = IsRightSideOrFlatBottomLine(v0.drawpos.xy(), v1.drawpos.xy(), v2.drawpos.xy()) ? -1 : 0;
	int bias1 = IsRightSideOrFlatBottomLine(v1.drawpos.xy(), v2.drawpos.xy(), v0.drawpos.xy()) ? -1 : 0;
	int bias2 = IsRightSideOrFlatBottomLine(v2.drawpos.xy(), v0.drawpos.xy(), v1.drawpos.xy()) ? -1 : 0;

	DrawingCoords p(minX, minY, 0);
	for (p.y = minY; p.y <= maxY; ++p.y) {
		for (p.x = minX; p.x <= maxX; ++p.x) {
			int w0 = orient2d(v1.drawpos, v2.drawpos, p) + bias0;
			int w1 = orient2d(v2.drawpos, v0.drawpos, p) + bias1;
			int w2 = orient2d(v0.drawpos, v1.drawpos, p) + bias2;

			// If p is on or inside all edges, render pixel
			// TODO: Should only render when it's on the left of the right edge
			if (w0 >=0 && w1 >= 0 && w2 >= 0) {
				if (w0 == w1 && w1 == w2 && w2 == 0)
					continue;

				// TODO: Make sure this is not ridiculously small?
				float den = 1.0f/v0.clippos.w * w0 + 1.0f/v1.clippos.w * w1 + 1.0f/v2.clippos.w * w2;

				// TODO: Depth range test

				if (gstate.isStencilTestEnabled() && !gstate.isModeClear()) {
					bool pass = false;
					u8 stencil = GetPixelStencil(p.x, p.y) & gstate.getStencilTestMask(); // TODO: Magic?
					u8 ref = gstate.getStencilTestRef() & gstate.getStencilTestMask();
					switch (gstate.getStencilTestFunction()) {
						case GE_COMP_NEVER:
							pass = false;
							break;
						case GE_COMP_ALWAYS:
							pass = true;
							break;
						case GE_COMP_EQUAL:
							pass = (stencil == ref);
							break;
						case GE_COMP_NOTEQUAL:
							pass = (stencil != ref);
							break;
						case GE_COMP_LESS:
							pass = (stencil < ref);
							break;
						case GE_COMP_LEQUAL:
							pass = (stencil <= ref);
							break;
						case GE_COMP_GREATER:
							pass = (stencil > ref);
							break;
						case GE_COMP_GEQUAL:
							pass = (stencil >= ref);
							break;
					}

					if (!pass) {
						ApplyStencilOp(gstate.getStencilOpSFail(), p.x, p.y);
						continue;
					}
				}

				// TODO: Is it safe to ignore gstate.isDepthTestEnabled() when clear mode is enabled?
				if ((gstate.isDepthTestEnabled() && !gstate.isModeThrough()) || gstate.isModeClear()) {
					// TODO: Is that the correct way to interpolate?
					u16 z = (u16)((v0.drawpos.z * w0 + v1.drawpos.z * w1 + v2.drawpos.z * w2) / (w0+w1+w2));

					// TODO: Verify that stencil op indeed needs to be applied here even if stencil testing is disabled
					if (!DepthTestPassed(p.x, p.y, z)) {
						ApplyStencilOp(gstate.getStencilOpZFail(), p.x, p.y);
						continue;
					} else {
						ApplyStencilOp(gstate.getStencilOpZPass(), p.x, p.y);
					}

					// TODO: Is this condition correct?
					if (gstate.isDepthWriteEnabled() || ((gstate.clearmode&0x40) && gstate.isModeClear()))
						SetPixelDepth(p.x, p.y, z);
				}

				float s = (v0.texturecoords.s() * w0 / v0.clippos.w + v1.texturecoords.s() * w1 / v1.clippos.w + v2.texturecoords.s() * w2 / v2.clippos.w) / den;
				float t = (v0.texturecoords.t() * w0 / v0.clippos.w + v1.texturecoords.t() * w1 / v1.clippos.w + v2.texturecoords.t() * w2 / v2.clippos.w) / den;
				Vec3<int> prim_color_rgb(0, 0, 0);
				int prim_color_a = 0;
				Vec3<int> sec_color(0, 0, 0);
				if ((gstate.shademodel&1) == GE_SHADE_GOURAUD) {
					// NOTE: When not casting color0 and color1 to float vectors, this code suffers from severe overflow issues.
					// Not sure if that should be regarded as a bug or if casting to float is a valid fix.
					// TODO: Is that the correct way to interpolate?
					prim_color_rgb = ((v0.color0.rgb().Cast<float>() * w0 +
									v1.color0.rgb().Cast<float>() * w1 +
									v2.color0.rgb().Cast<float>() * w2) / (w0+w1+w2)).Cast<int>();
					prim_color_a = (int)((v0.color0.a() * w0 + v1.color0.a() * w1 + v2.color0.a() * w2) / (w0+w1+w2));
					sec_color = ((v0.color1.Cast<float>() * w0 +
									v1.color1.Cast<float>() * w1 +
									v2.color1.Cast<float>() * w2) / (w0+w1+w2)).Cast<int>();
				} else {
					prim_color_rgb = v2.color0.rgb();
					prim_color_a = v2.color0.a();
					sec_color = v2.color1;
				}

				// TODO: Also disable if vertex has no texture coordinates?
				if (gstate.isTextureMapEnabled() && !gstate.isModeClear()) {
					Vec4<int> texcolor = Vec4<int>::FromRGBA(/*TextureDecoder::*/SampleNearest(0, s, t));

					bool rgba = (gstate.texfunc & 0x10) != 0;

					// texture function
					switch (gstate.getTextureFunction()) {
					case GE_TEXFUNC_MODULATE:
						prim_color_rgb = prim_color_rgb * texcolor.rgb() / 255;
						prim_color_a = (rgba) ? (prim_color_a * texcolor.a() / 255) : prim_color_a;
						break;

					case GE_TEXFUNC_DECAL:
					{
						int t = (rgba) ? texcolor.a() : 255;
						int invt = (rgba) ? 255 - t : 0;
						prim_color_rgb = (invt * prim_color_rgb + t * texcolor.rgb()) / 255;
						// prim_color_a = prim_color_a;
						break;
					}

					case GE_TEXFUNC_BLEND:
					{
						const Vec3<int> const255(255, 255, 255);
						const Vec3<int> texenv(gstate.getTextureEnvColR(), gstate.getTextureEnvColG(), gstate.getTextureEnvColB());
						prim_color_rgb = ((const255 - texcolor.rgb()) * prim_color_rgb + texcolor.rgb() * texenv) / 255;
						prim_color_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255;
						break;
					}

					case GE_TEXFUNC_REPLACE:
						prim_color_rgb = texcolor.rgb();
						prim_color_a = (rgba) ? texcolor.a() : prim_color_a;
						break;

					case GE_TEXFUNC_ADD:
						prim_color_rgb += texcolor.rgb();
						if (prim_color_rgb.r() > 255) prim_color_rgb.r() = 255;
						if (prim_color_rgb.g() > 255) prim_color_rgb.g() = 255;
						if (prim_color_rgb.b() > 255) prim_color_rgb.b() = 255;
						prim_color_a = prim_color_a * ((rgba) ? texcolor.a() : 255) / 255;
						break;

					default:
						ERROR_LOG(G3D, "Unknown texture function %x", gstate.getTextureFunction());
					}
				}

				if (gstate.isColorDoublingEnabled()) {
					// TODO: Do we need to clamp here?
					prim_color_rgb *= 2;
					sec_color *= 2;
				}

				prim_color_rgb += sec_color;
				if (prim_color_rgb.r() > 255) prim_color_rgb.r() = 255;
				if (prim_color_rgb.g() > 255) prim_color_rgb.g() = 255;
				if (prim_color_rgb.b() > 255) prim_color_rgb.b() = 255;
				if (prim_color_rgb.r() < 0) prim_color_rgb.r() = 0;
				if (prim_color_rgb.g() < 0) prim_color_rgb.g() = 0;
				if (prim_color_rgb.b() < 0) prim_color_rgb.b() = 0;

				// TODO: Fogging

				if (gstate.isAlphaBlendEnabled() && !gstate.isModeClear()) {
					Vec4<int> dst = Vec4<int>::FromRGBA(GetPixelColor(p.x, p.y));

					Vec3<int> srccol(0, 0, 0);
					Vec3<int> dstcol(0, 0, 0);

					switch (gstate.getBlendFuncA()) {
					case GE_SRCBLEND_DSTCOLOR:
						srccol = dst.rgb();
						break;
					case GE_SRCBLEND_INVDSTCOLOR:
						srccol = Vec3<int>::AssignToAll(255) - dst.rgb();
						break;
					case GE_SRCBLEND_SRCALPHA:
						srccol = Vec3<int>::AssignToAll(prim_color_a);
						break;
					case GE_SRCBLEND_INVSRCALPHA:
						srccol = Vec3<int>::AssignToAll(255 - prim_color_a);
						break;
					case GE_SRCBLEND_DSTALPHA:
						srccol = Vec3<int>::AssignToAll(dst.a());
						break;
					case GE_SRCBLEND_INVDSTALPHA:
						srccol = Vec3<int>::AssignToAll(255 - dst.a());
						break;
					case GE_SRCBLEND_DOUBLESRCALPHA:
						srccol = Vec3<int>::AssignToAll(2 * prim_color_a);
						break;
					case GE_SRCBLEND_DOUBLEINVSRCALPHA:
						srccol = Vec3<int>::AssignToAll(255 - 2 * prim_color_a);
						break;
					case GE_SRCBLEND_DOUBLEDSTALPHA:
						srccol = Vec3<int>::AssignToAll(2 * dst.a());
						break;
					case GE_SRCBLEND_DOUBLEINVDSTALPHA:
						srccol = Vec3<int>::AssignToAll(255 - 2 * dst.a());
						break;
					case GE_SRCBLEND_FIXA:
						srccol = Vec4<int>::FromRGBA(gstate.getFixA()).rgb();
						break;
					}

					switch (gstate.getBlendFuncB()) {
					case GE_DSTBLEND_SRCCOLOR:
						dstcol = prim_color_rgb;
						break;
					case GE_DSTBLEND_INVSRCCOLOR:
						dstcol = Vec3<int>::AssignToAll(255) - prim_color_rgb;
						break;
					case GE_DSTBLEND_SRCALPHA:
						dstcol = Vec3<int>::AssignToAll(prim_color_a);
						break;
					case GE_DSTBLEND_INVSRCALPHA:
						dstcol = Vec3<int>::AssignToAll(255 - prim_color_a);
						break;
					case GE_DSTBLEND_DSTALPHA:
						dstcol = Vec3<int>::AssignToAll(dst.a());
						break;
					case GE_DSTBLEND_INVDSTALPHA:
						dstcol = Vec3<int>::AssignToAll(255 - dst.a());
						break;
					case GE_DSTBLEND_DOUBLESRCALPHA:
						dstcol = Vec3<int>::AssignToAll(2 * prim_color_a);
						break;
					case GE_DSTBLEND_DOUBLEINVSRCALPHA:
						dstcol = Vec3<int>::AssignToAll(255 - 2 * prim_color_a);
						break;
					case GE_DSTBLEND_DOUBLEDSTALPHA:
						dstcol = Vec3<int>::AssignToAll(2 * dst.a());
						break;
					case GE_DSTBLEND_DOUBLEINVDSTALPHA:
						dstcol = Vec3<int>::AssignToAll(255 - 2 * dst.a());
						break;
					case GE_DSTBLEND_FIXB:
						dstcol = Vec4<int>::FromRGBA(gstate.getFixB()).rgb();
						break;
					}

					switch (gstate.getBlendEq()) {
					case GE_BLENDMODE_MUL_AND_ADD:
						prim_color_rgb = (prim_color_rgb * srccol + dst.rgb() * dstcol) / 255;
						break;
					case GE_BLENDMODE_MUL_AND_SUBTRACT:
						prim_color_rgb = (prim_color_rgb * srccol - dst.rgb() * dstcol) / 255;
						break;
					case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
						prim_color_rgb = (dst.rgb() * dstcol - prim_color_rgb * srccol) / 255;
						break;
					case GE_BLENDMODE_MIN:
						prim_color_rgb.r() = std::min(prim_color_rgb.r(), dst.r());
						prim_color_rgb.g() = std::min(prim_color_rgb.g(), dst.g());
						prim_color_rgb.b() = std::min(prim_color_rgb.b(), dst.b());
						break;
					case GE_BLENDMODE_MAX:
						prim_color_rgb.r() = std::max(prim_color_rgb.r(), dst.r());
						prim_color_rgb.g() = std::max(prim_color_rgb.g(), dst.g());
						prim_color_rgb.b() = std::max(prim_color_rgb.b(), dst.b());
						break;
					case GE_BLENDMODE_ABSDIFF:
						prim_color_rgb.r() = ::abs(prim_color_rgb.r() - dst.r());
						prim_color_rgb.g() = ::abs(prim_color_rgb.g() - dst.g());
						prim_color_rgb.b() = ::abs(prim_color_rgb.b() - dst.b());
						break;
					}
				}
				SetPixelColor(p.x, p.y, Vec4<int>(prim_color_rgb.r(), prim_color_rgb.g(), prim_color_rgb.b(), prim_color_a).ToRGBA());
			}
		}
	}
}

} // namespace