mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-12-15 01:19:47 +00:00
28cfbe0e5a
This is more useful to group common operations together for profiling.
603 lines
17 KiB
C++
603 lines
17 KiB
C++
// Copyright (c) 2017- PPSSPP Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0 or later versions.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official git repository and contact information can be found at
|
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
|
|
|
#include "ppsspp_config.h"
|
|
#include <unordered_map>
|
|
#include <mutex>
|
|
#include "Common/Data/Convert/ColorConv.h"
|
|
#include "Common/StringUtils.h"
|
|
#include "Core/Config.h"
|
|
#include "Core/Reporting.h"
|
|
#include "GPU/Common/TextureDecoder.h"
|
|
#include "GPU/GPUState.h"
|
|
#include "GPU/Software/Rasterizer.h"
|
|
#include "GPU/Software/RasterizerRegCache.h"
|
|
#include "GPU/Software/Sampler.h"
|
|
|
|
#if defined(_M_SSE)
|
|
#include <emmintrin.h>
|
|
#endif
|
|
|
|
using namespace Math3D;
|
|
using namespace Rasterizer;
|
|
|
|
extern u32 clut[4096];
|
|
|
|
namespace Sampler {
|
|
|
|
static Vec4IntResult SOFTRAST_CALL SampleNearest(int u, int v, const u8 *tptr, int bufw, int level);
|
|
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 **tptr, const int *bufw, int level, int levelFrac);
|
|
|
|
std::mutex jitCacheLock;
|
|
SamplerJitCache *jitCache = nullptr;
|
|
|
|
void Init() {
|
|
jitCache = new SamplerJitCache();
|
|
}
|
|
|
|
void Shutdown() {
|
|
delete jitCache;
|
|
jitCache = nullptr;
|
|
}
|
|
|
|
bool DescribeCodePtr(const u8 *ptr, std::string &name) {
|
|
if (!jitCache->IsInSpace(ptr)) {
|
|
return false;
|
|
}
|
|
|
|
name = jitCache->DescribeCodePtr(ptr);
|
|
return true;
|
|
}
|
|
|
|
NearestFunc GetNearestFunc() {
|
|
SamplerID id;
|
|
jitCache->ComputeSamplerID(&id, false);
|
|
NearestFunc jitted = jitCache->GetNearest(id);
|
|
if (jitted) {
|
|
return jitted;
|
|
}
|
|
|
|
return &SampleNearest;
|
|
}
|
|
|
|
LinearFunc GetLinearFunc() {
|
|
SamplerID id;
|
|
jitCache->ComputeSamplerID(&id, true);
|
|
LinearFunc jitted = jitCache->GetLinear(id);
|
|
if (jitted) {
|
|
return jitted;
|
|
}
|
|
|
|
return &SampleLinear;
|
|
}
|
|
|
|
SamplerJitCache::SamplerJitCache()
|
|
#if PPSSPP_ARCH(ARM64)
|
|
: fp(this)
|
|
#endif
|
|
{
|
|
// 256k should be enough.
|
|
AllocCodeSpace(1024 * 64 * 4);
|
|
|
|
// Add some random code to "help" MSVC's buggy disassembler :(
|
|
#if defined(_WIN32) && (PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)) && !PPSSPP_PLATFORM(UWP)
|
|
using namespace Gen;
|
|
for (int i = 0; i < 100; i++) {
|
|
MOV(32, R(EAX), R(EBX));
|
|
RET();
|
|
}
|
|
#elif PPSSPP_ARCH(ARM)
|
|
BKPT(0);
|
|
BKPT(0);
|
|
#endif
|
|
}
|
|
|
|
void SamplerJitCache::Clear() {
|
|
ClearCodeSpace(0);
|
|
cache_.clear();
|
|
addresses_.clear();
|
|
}
|
|
|
|
void SamplerJitCache::ComputeSamplerID(SamplerID *id_out, bool linear) {
|
|
SamplerID id{};
|
|
|
|
id.useStandardBufw = true;
|
|
id.hasStandardMips = true;
|
|
int maxLevel = gstate.isMipmapEnabled() ? gstate.getTextureMaxLevel() : 0;
|
|
int lastWidth = -1;
|
|
for (int i = 0; i <= maxLevel; ++i) {
|
|
if (gstate.getTextureAddress(i) == 0)
|
|
id.hasInvalidPtr = true;
|
|
int w = gstate.getTextureWidth(i);
|
|
if (w != (gstate.texbufwidth[i] & 0x00001FFF))
|
|
id.useStandardBufw = false;
|
|
if (lastWidth != -1 && lastWidth != w * 2)
|
|
id.hasStandardMips = false;
|
|
lastWidth = w;
|
|
}
|
|
id.hasAnyMips = maxLevel != 0;
|
|
|
|
id.texfmt = gstate.getTextureFormat();
|
|
id.swizzle = gstate.isTextureSwizzled();
|
|
// Only CLUT4 can use separate CLUTs per mimap.
|
|
id.useSharedClut = gstate.getTextureFormat() != GE_TFMT_CLUT4 || maxLevel == 0 || !gstate.isMipmapEnabled() || gstate.isClutSharedForMipmaps();
|
|
if (gstate.isTextureFormatIndexed()) {
|
|
id.clutfmt = gstate.getClutPaletteFormat();
|
|
id.hasClutMask = gstate.getClutIndexMask() != 0xFF;
|
|
id.hasClutShift = gstate.getClutIndexShift() != 0;
|
|
id.hasClutOffset = gstate.getClutIndexStartPos() != 0;
|
|
}
|
|
id.linear = linear;
|
|
|
|
id.clampS = gstate.isTexCoordClampedS();
|
|
id.clampT = gstate.isTexCoordClampedT();
|
|
id.width0Shift = gstate.texsize[0] & 0xF;
|
|
id.height0Shift = (gstate.texsize[0] >> 8) & 0xF;
|
|
|
|
id.useTextureAlpha = gstate.isTextureAlphaUsed();
|
|
id.useColorDoubling = gstate.isColorDoublingEnabled();
|
|
id.texFunc = gstate.getTextureFunction();
|
|
if (id.texFunc > GE_TEXFUNC_ADD)
|
|
id.texFunc = GE_TEXFUNC_ADD;
|
|
|
|
*id_out = id;
|
|
}
|
|
|
|
std::string SamplerJitCache::DescribeSamplerID(const SamplerID &id) {
|
|
std::string name;
|
|
switch (id.TexFmt()) {
|
|
case GE_TFMT_5650: name = "5650"; break;
|
|
case GE_TFMT_5551: name = "5551"; break;
|
|
case GE_TFMT_4444: name = "4444"; break;
|
|
case GE_TFMT_8888: name = "8888"; break;
|
|
case GE_TFMT_CLUT4: name = "CLUT4"; break;
|
|
case GE_TFMT_CLUT8: name = "CLUT8"; break;
|
|
case GE_TFMT_CLUT16: name = "CLUT16"; break;
|
|
case GE_TFMT_CLUT32: name = "CLUT32"; break;
|
|
case GE_TFMT_DXT1: name = "DXT1"; break;
|
|
case GE_TFMT_DXT3: name = "DXT3"; break;
|
|
case GE_TFMT_DXT5: name = "DXT5"; break;
|
|
}
|
|
switch (id.ClutFmt()) {
|
|
case GE_CMODE_16BIT_BGR5650:
|
|
switch (id.TexFmt()) {
|
|
case GE_TFMT_CLUT4:
|
|
case GE_TFMT_CLUT8:
|
|
case GE_TFMT_CLUT16:
|
|
case GE_TFMT_CLUT32:
|
|
name += ":C5650";
|
|
break;
|
|
default:
|
|
// Ignore 0 clutfmt when no clut.
|
|
break;
|
|
}
|
|
break;
|
|
case GE_CMODE_16BIT_ABGR5551: name += ":C5551"; break;
|
|
case GE_CMODE_16BIT_ABGR4444: name += ":C4444"; break;
|
|
case GE_CMODE_32BIT_ABGR8888: name += ":C8888"; break;
|
|
}
|
|
if (id.swizzle) {
|
|
name += ":SWZ";
|
|
}
|
|
if (!id.useSharedClut) {
|
|
name += ":CMIP";
|
|
}
|
|
if (id.hasInvalidPtr) {
|
|
name += ":INV";
|
|
}
|
|
if (id.hasClutMask) {
|
|
name += ":CMASK";
|
|
}
|
|
if (id.hasClutShift) {
|
|
name += ":CSHF";
|
|
}
|
|
if (id.hasClutOffset) {
|
|
name += ":COFF";
|
|
}
|
|
if (id.clampS || id.clampT) {
|
|
name += std::string(":CL") + (id.clampS ? "S" : "") + (id.clampT ? "T" : "");
|
|
}
|
|
if (!id.useStandardBufw) {
|
|
name += ":BUFW";
|
|
}
|
|
if (!id.hasStandardMips) {
|
|
name += ":XMIP";
|
|
} else if (id.hasAnyMips) {
|
|
name += ":MIP";
|
|
}
|
|
if (id.linear) {
|
|
name += ":LERP";
|
|
}
|
|
if (id.useTextureAlpha) {
|
|
name += ":A";
|
|
}
|
|
if (id.useColorDoubling) {
|
|
name += ":DBL";
|
|
}
|
|
switch (id.texFunc) {
|
|
case GE_TEXFUNC_MODULATE:
|
|
name += ":MOD";
|
|
break;
|
|
case GE_TEXFUNC_DECAL:
|
|
name += ":DECAL";
|
|
break;
|
|
case GE_TEXFUNC_BLEND:
|
|
name += ":BLEND";
|
|
break;
|
|
case GE_TEXFUNC_REPLACE:
|
|
break;
|
|
case GE_TEXFUNC_ADD:
|
|
name += ":ADD";
|
|
default:
|
|
break;
|
|
}
|
|
name += StringFromFormat(":W%dH%d", 1 << id.width0Shift, 1 << id.height0Shift);
|
|
|
|
return name;
|
|
}
|
|
|
|
void SamplerJitCache::Describe(const std::string &message) {
|
|
descriptions_[GetCodePointer()] = message;
|
|
}
|
|
|
|
std::string SamplerJitCache::DescribeCodePtr(const u8 *ptr) {
|
|
constexpr bool USE_IDS = false;
|
|
ptrdiff_t dist = 0x7FFFFFFF;
|
|
if (USE_IDS) {
|
|
SamplerID found{};
|
|
for (const auto &it : addresses_) {
|
|
ptrdiff_t it_dist = ptr - it.second;
|
|
if (it_dist >= 0 && it_dist < dist) {
|
|
found = it.first;
|
|
dist = it_dist;
|
|
}
|
|
}
|
|
|
|
return DescribeSamplerID(found);
|
|
} else {
|
|
std::string found;
|
|
for (const auto &it : descriptions_) {
|
|
ptrdiff_t it_dist = ptr - it.first;
|
|
if (it_dist >= 0 && it_dist < dist) {
|
|
found = it.second;
|
|
dist = it_dist;
|
|
}
|
|
}
|
|
return found;
|
|
}
|
|
}
|
|
|
|
NearestFunc SamplerJitCache::GetNearest(const SamplerID &id) {
|
|
std::lock_guard<std::mutex> guard(jitCacheLock);
|
|
|
|
auto it = cache_.find(id);
|
|
if (it != cache_.end()) {
|
|
return it->second;
|
|
}
|
|
|
|
// TODO: What should be the min size? Can we even hit this?
|
|
if (GetSpaceLeft() < 16384) {
|
|
Clear();
|
|
}
|
|
|
|
#if PPSSPP_ARCH(AMD64) && !PPSSPP_PLATFORM(UWP)
|
|
if (g_Config.bSoftwareRenderingJit) {
|
|
addresses_[id] = GetCodePointer();
|
|
NearestFunc func = Compile(id);
|
|
cache_[id] = func;
|
|
return func;
|
|
}
|
|
#endif
|
|
return nullptr;
|
|
}
|
|
|
|
LinearFunc SamplerJitCache::GetLinear(const SamplerID &id) {
|
|
std::lock_guard<std::mutex> guard(jitCacheLock);
|
|
|
|
auto it = cache_.find(id);
|
|
if (it != cache_.end()) {
|
|
return (LinearFunc)it->second;
|
|
}
|
|
|
|
// TODO: What should be the min size? Can we even hit this?
|
|
if (GetSpaceLeft() < 16384) {
|
|
Clear();
|
|
}
|
|
|
|
#if PPSSPP_ARCH(AMD64) && !PPSSPP_PLATFORM(UWP)
|
|
if (g_Config.bSoftwareRenderingJit) {
|
|
addresses_[id] = GetCodePointer();
|
|
LinearFunc func = CompileLinear(id);
|
|
cache_[id] = (NearestFunc)func;
|
|
return func;
|
|
}
|
|
#endif
|
|
return nullptr;
|
|
}
|
|
|
|
template <unsigned int texel_size_bits>
|
|
static inline int GetPixelDataOffset(unsigned int row_pitch_pixels, unsigned int u, unsigned int v)
|
|
{
|
|
if (!gstate.isTextureSwizzled())
|
|
return (v * (row_pitch_pixels * texel_size_bits >> 3)) + (u * texel_size_bits >> 3);
|
|
|
|
const int tile_size_bits = 32;
|
|
const int tiles_in_block_horizontal = 4;
|
|
const int tiles_in_block_vertical = 8;
|
|
|
|
int texels_per_tile = tile_size_bits / texel_size_bits;
|
|
int tile_u = u / texels_per_tile;
|
|
int tile_idx = (v % tiles_in_block_vertical) * (tiles_in_block_horizontal) +
|
|
// TODO: not sure if the *texel_size_bits/8 factor is correct
|
|
(v / tiles_in_block_vertical) * ((row_pitch_pixels*texel_size_bits/(tile_size_bits))*tiles_in_block_vertical) +
|
|
(tile_u % tiles_in_block_horizontal) +
|
|
(tile_u / tiles_in_block_horizontal) * (tiles_in_block_horizontal*tiles_in_block_vertical);
|
|
|
|
return tile_idx * (tile_size_bits / 8) + ((u % texels_per_tile) * texel_size_bits) / 8;
|
|
}
|
|
|
|
static inline u32 LookupColor(unsigned int index, unsigned int level)
|
|
{
|
|
const bool mipmapShareClut = gstate.isClutSharedForMipmaps();
|
|
const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
|
|
|
|
switch (gstate.getClutPaletteFormat()) {
|
|
case GE_CMODE_16BIT_BGR5650:
|
|
return RGB565ToRGBA8888(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
|
|
|
|
case GE_CMODE_16BIT_ABGR5551:
|
|
return RGBA5551ToRGBA8888(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
|
|
|
|
case GE_CMODE_16BIT_ABGR4444:
|
|
return RGBA4444ToRGBA8888(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
|
|
|
|
case GE_CMODE_32BIT_ABGR8888:
|
|
return clut[index + clutSharingOffset];
|
|
|
|
default:
|
|
ERROR_LOG_REPORT(G3D, "Software: Unsupported palette format: %x", gstate.getClutPaletteFormat());
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
struct Nearest4 {
|
|
alignas(16) u32 v[4];
|
|
|
|
operator u32() const {
|
|
return v[0];
|
|
}
|
|
};
|
|
|
|
template <int N>
|
|
inline static Nearest4 SOFTRAST_CALL SampleNearest(const int u[N], const int v[N], const u8 *srcptr, int texbufw, int level) {
|
|
Nearest4 res;
|
|
if (!srcptr) {
|
|
memset(res.v, 0, sizeof(res.v));
|
|
return res;
|
|
}
|
|
|
|
GETextureFormat texfmt = gstate.getTextureFormat();
|
|
|
|
// TODO: Should probably check if textures are aligned properly...
|
|
|
|
switch (texfmt) {
|
|
case GE_TFMT_4444:
|
|
for (int i = 0; i < N; ++i) {
|
|
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
|
|
res.v[i] = RGBA4444ToRGBA8888(*(const u16 *)src);
|
|
}
|
|
return res;
|
|
|
|
case GE_TFMT_5551:
|
|
for (int i = 0; i < N; ++i) {
|
|
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
|
|
res.v[i] = RGBA5551ToRGBA8888(*(const u16 *)src);
|
|
}
|
|
return res;
|
|
|
|
case GE_TFMT_5650:
|
|
for (int i = 0; i < N; ++i) {
|
|
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
|
|
res.v[i] = RGB565ToRGBA8888(*(const u16 *)src);
|
|
}
|
|
return res;
|
|
|
|
case GE_TFMT_8888:
|
|
for (int i = 0; i < N; ++i) {
|
|
const u8 *src = srcptr + GetPixelDataOffset<32>(texbufw, u[i], v[i]);
|
|
res.v[i] = *(const u32 *)src;
|
|
}
|
|
return res;
|
|
|
|
case GE_TFMT_CLUT32:
|
|
for (int i = 0; i < N; ++i) {
|
|
const u8 *src = srcptr + GetPixelDataOffset<32>(texbufw, u[i], v[i]);
|
|
u32 val = src[0] + (src[1] << 8) + (src[2] << 16) + (src[3] << 24);
|
|
res.v[i] = LookupColor(gstate.transformClutIndex(val), 0);
|
|
}
|
|
return res;
|
|
|
|
case GE_TFMT_CLUT16:
|
|
for (int i = 0; i < N; ++i) {
|
|
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
|
|
u16 val = src[0] + (src[1] << 8);
|
|
res.v[i] = LookupColor(gstate.transformClutIndex(val), 0);
|
|
}
|
|
return res;
|
|
|
|
case GE_TFMT_CLUT8:
|
|
for (int i = 0; i < N; ++i) {
|
|
const u8 *src = srcptr + GetPixelDataOffset<8>(texbufw, u[i], v[i]);
|
|
u8 val = *src;
|
|
res.v[i] = LookupColor(gstate.transformClutIndex(val), 0);
|
|
}
|
|
return res;
|
|
|
|
case GE_TFMT_CLUT4:
|
|
for (int i = 0; i < N; ++i) {
|
|
const u8 *src = srcptr + GetPixelDataOffset<4>(texbufw, u[i], v[i]);
|
|
u8 val = (u[i] & 1) ? (src[0] >> 4) : (src[0] & 0xF);
|
|
// Only CLUT4 uses separate mipmap palettes.
|
|
res.v[i] = LookupColor(gstate.transformClutIndex(val), level);
|
|
}
|
|
return res;
|
|
|
|
case GE_TFMT_DXT1:
|
|
for (int i = 0; i < N; ++i) {
|
|
const DXT1Block *block = (const DXT1Block *)srcptr + (v[i] / 4) * (texbufw / 4) + (u[i] / 4);
|
|
res.v[i] = GetDXT1Texel(block, u[i] % 4, v[i] % 4);
|
|
}
|
|
return res;
|
|
|
|
case GE_TFMT_DXT3:
|
|
for (int i = 0; i < N; ++i) {
|
|
const DXT3Block *block = (const DXT3Block *)srcptr + (v[i] / 4) * (texbufw / 4) + (u[i] / 4);
|
|
res.v[i] = GetDXT3Texel(block, u[i] % 4, v[i] % 4);
|
|
}
|
|
return res;
|
|
|
|
case GE_TFMT_DXT5:
|
|
for (int i = 0; i < N; ++i) {
|
|
const DXT5Block *block = (const DXT5Block *)srcptr + (v[i] / 4) * (texbufw / 4) + (u[i] / 4);
|
|
res.v[i] = GetDXT5Texel(block, u[i] % 4, v[i] % 4);
|
|
}
|
|
return res;
|
|
|
|
default:
|
|
ERROR_LOG_REPORT(G3D, "Software: Unsupported texture format: %x", texfmt);
|
|
memset(res.v, 0, sizeof(res.v));
|
|
return res;
|
|
}
|
|
}
|
|
|
|
static Vec4IntResult SOFTRAST_CALL SampleNearest(int u, int v, const u8 *tptr, int bufw, int level) {
|
|
Nearest4 c = SampleNearest<1>(&u, &v, tptr, bufw, level);
|
|
return ToVec4IntResult(Vec4<int>::FromRGBA(c.v[0]));
|
|
}
|
|
|
|
static inline int ClampUV(int v, int height) {
|
|
if (v >= height - 1)
|
|
return height - 1;
|
|
else if (v < 0)
|
|
return 0;
|
|
return v;
|
|
}
|
|
|
|
static inline int WrapUV(int v, int height) {
|
|
return v & (height - 1);
|
|
}
|
|
|
|
static inline Vec4IntResult SOFTRAST_CALL ApplyTexelClampQuad(bool clamp, Vec4IntArg vec, int width) {
|
|
Vec4<int> result = vec;
|
|
#ifdef _M_SSE
|
|
if (clamp) {
|
|
// First, clamp to zero.
|
|
__m128i negmask = _mm_cmpgt_epi32(_mm_setzero_si128(), result.ivec);
|
|
result.ivec = _mm_andnot_si128(negmask, result.ivec);
|
|
|
|
// Now the high bound.
|
|
__m128i bound = _mm_set1_epi32(width - 1);
|
|
__m128i goodmask = _mm_cmpgt_epi32(bound, result.ivec);
|
|
// Clear the ones that were too high, then or in the high bound to those.
|
|
result.ivec = _mm_and_si128(goodmask, result.ivec);
|
|
result.ivec = _mm_or_si128(result.ivec, _mm_andnot_si128(goodmask, bound));
|
|
} else {
|
|
result.ivec = _mm_and_si128(result.ivec, _mm_set1_epi32(width - 1));
|
|
}
|
|
#else
|
|
if (clamp) {
|
|
for (int i = 0; i < 4; ++i) {
|
|
result[i] = ClampUV(result[i], width);
|
|
}
|
|
} else {
|
|
for (int i = 0; i < 4; ++i) {
|
|
result[i] = WrapUV(result[i], width);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return ToVec4IntResult(result);
|
|
}
|
|
|
|
static inline Vec4IntResult SOFTRAST_CALL ApplyTexelClampQuadS(bool clamp, int u, int width) {
|
|
#ifdef _M_SSE
|
|
__m128i uvec = _mm_add_epi32(_mm_set1_epi32(u), _mm_set_epi32(1, 0, 1, 0));
|
|
return ApplyTexelClampQuad(clamp, uvec, width);
|
|
#else
|
|
Vec4<int> result = Vec4<int>::AssignToAll(u) + Vec4<int>(0, 1, 0, 1);
|
|
return ApplyTexelClampQuad(clamp, ToVec4IntArg(result), width);
|
|
#endif
|
|
}
|
|
|
|
static inline Vec4IntResult SOFTRAST_CALL ApplyTexelClampQuadT(bool clamp, int v, int height) {
|
|
#ifdef _M_SSE
|
|
__m128i vvec = _mm_add_epi32(_mm_set1_epi32(v), _mm_set_epi32(1, 1, 0, 0));
|
|
return ApplyTexelClampQuad(clamp, vvec, height);
|
|
#else
|
|
Vec4<int> result = Vec4<int>::AssignToAll(v) + Vec4<int>(0, 0, 1, 1);
|
|
return ApplyTexelClampQuad(clamp, ToVec4IntArg(result), height);
|
|
#endif
|
|
}
|
|
|
|
static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadS(int level, float in_s, int &frac_u, int x) {
|
|
int width = gstate.getTextureWidth(level);
|
|
|
|
int base_u = (int)(in_s * width * 256) + 12 - x - 128;
|
|
frac_u = (int)(base_u >> 4) & 0x0F;
|
|
base_u >>= 8;
|
|
|
|
// Need to generate and individually wrap/clamp the four sample coordinates. Ugh.
|
|
return ApplyTexelClampQuadS(gstate.isTexCoordClampedS(), base_u, width);
|
|
}
|
|
|
|
static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadT(int level, float in_t, int &frac_v, int y) {
|
|
int height = gstate.getTextureHeight(level);
|
|
|
|
int base_v = (int)(in_t * height * 256) + 12 - y - 128;
|
|
frac_v = (int)(base_v >> 4) & 0x0F;
|
|
base_v >>= 8;
|
|
|
|
// Need to generate and individually wrap/clamp the four sample coordinates. Ugh.
|
|
return ApplyTexelClampQuadT(gstate.isTexCoordClampedT(), base_v, height);
|
|
}
|
|
|
|
static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, int y, const u8 **tptr, const int *bufw, int texlevel) {
|
|
int frac_u, frac_v;
|
|
const Vec4<int> u = GetTexelCoordinatesQuadS(texlevel, s, frac_u, x);
|
|
const Vec4<int> v = GetTexelCoordinatesQuadT(texlevel, t, frac_v, y);
|
|
Nearest4 c = SampleNearest<4>(u.AsArray(), v.AsArray(), tptr[0], bufw[0], texlevel);
|
|
|
|
Vec4<int> texcolor_tl = Vec4<int>::FromRGBA(c.v[0]);
|
|
Vec4<int> texcolor_tr = Vec4<int>::FromRGBA(c.v[1]);
|
|
Vec4<int> texcolor_bl = Vec4<int>::FromRGBA(c.v[2]);
|
|
Vec4<int> texcolor_br = Vec4<int>::FromRGBA(c.v[3]);
|
|
Vec4<int> top = texcolor_tl * (0x10 - frac_u) + texcolor_tr * frac_u;
|
|
Vec4<int> bot = texcolor_bl * (0x10 - frac_u) + texcolor_br * frac_u;
|
|
return ToVec4IntResult((top * (0x10 - frac_v) + bot * frac_v) / (16 * 16));
|
|
}
|
|
|
|
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 **tptr, const int *bufw, int texlevel, int levelFrac) {
|
|
Vec4<int> c0 = SampleLinearLevel(s, t, x, y, tptr, bufw, texlevel);
|
|
if (levelFrac) {
|
|
const Vec4<int> c1 = SampleLinearLevel(s, t, x, y, tptr + 1, bufw + 1, texlevel + 1);
|
|
c0 = (c1 * levelFrac + c0 * (16 - levelFrac)) / 16;
|
|
}
|
|
return GetTextureFunctionOutput(prim_color, ToVec4IntArg(c0));
|
|
}
|
|
|
|
};
|