mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-21 14:41:39 +00:00
Change wrap_mode to clamp for bicubic upscaler
It was set to wrap, even though the comment claimed otherwise. Previous implementation had clamp, as do (I think) other upscaling modes (Hybrid, etc.). Also make upscaler codestyle a little more consistent with the rest.
This commit is contained in:
parent
9ae42a2b14
commit
9a01db5f42
@ -211,10 +211,10 @@ static void load_sample(ptrdiff_t w, ptrdiff_t h, ptrdiff_t s, const u8 *pixels,
|
||||
// equivalent to a>=0&&a<b.
|
||||
static_assert(sizeof(ptrdiff_t) == sizeof(size_t), "Assumes ptrdiff_t same width as size_t");
|
||||
|
||||
if((size_t)x >= (size_t)w||(size_t)y >= (size_t)h) {
|
||||
if((size_t)x >= (size_t)w || (size_t)y >= (size_t)h) {
|
||||
switch(wrap_mode) {
|
||||
case 0: // Wrap
|
||||
if(!((w&(w-1))|(h&(h-1)))) {
|
||||
if(!((w & (w-1)) | (h & (h-1)))) {
|
||||
// Both w and h are powers of 2.
|
||||
x &= w-1;
|
||||
y &= h-1;
|
||||
@ -239,7 +239,7 @@ static void load_sample(ptrdiff_t w, ptrdiff_t h, ptrdiff_t s, const u8 *pixels,
|
||||
break;
|
||||
}
|
||||
}
|
||||
memcpy(output, pixels+s*y+4*x, 4);
|
||||
memcpy(output, pixels + s*y + 4*x, 4);
|
||||
}
|
||||
|
||||
#define BLOCK 8
|
||||
@ -260,7 +260,7 @@ static void init_block(
|
||||
float w00 = B/6.0f , w01 = -C-0.5f*B, w02 = 2.0f*C+0.5f*B , w03 = -C-B/6.0f ;
|
||||
float w10 = 1.0f-B/3.0f,/*w11 = 0.0f ,*/w12 = C+2.0f*B-3.0f , w13 = -C-1.5f*B+2.0f;
|
||||
float w20 = B/6.0f , w21 = C+0.5f*B, w22 = -2.0f*C-2.5f*B+3.0f, w23 = C+1.5f*B-2.0f;
|
||||
float /*w30 = 0.0f , w31 = 0.0f ,*/w32 = -C , w33 = C+B/6.0f ;
|
||||
float /*w30 = 0.0f , w31 = 0.0f ,*/w32 = -C , w33 = C+B/6.0f ;
|
||||
// Express the sampling position as a rational
|
||||
// number num/den-1 (off by one, so that num is
|
||||
// always positive, since the C language does
|
||||
@ -269,9 +269,9 @@ static void init_block(
|
||||
ptrdiff_t den = 2*factor;
|
||||
float inv_den = 1.0f/(float)den;
|
||||
for(int dir = 0; dir < 2; ++dir) {
|
||||
ptrdiff_t num = (dir?2*y0+1+factor:2*x0+1+factor);
|
||||
ptrdiff_t *l = (dir?ly:lx), *l0 = (dir?ly0:lx0), *s = (dir?sy:sx);
|
||||
float (*c)[4] = (dir?cy:cx);
|
||||
ptrdiff_t num = (dir ? 2*y0+1+factor : 2*x0+1+factor);
|
||||
ptrdiff_t *l = (dir ? ly : lx), *l0 = (dir ? ly0 : lx0), *s = (dir ? sy : sx);
|
||||
float (*c)[4] = (dir ? cy : cx);
|
||||
(*l0) = num/den-2;
|
||||
num = num%den;
|
||||
for(ptrdiff_t i = 0, j = 0; i < BLOCK; ++i) {
|
||||
@ -288,13 +288,13 @@ static void init_block(
|
||||
(*s) = l[BLOCK-1]+4; // Total sampled src pixels in {x|y} direction.
|
||||
}
|
||||
// Get a local copy of the source pixels.
|
||||
if((*lx0) >=0 && (*ly0) >= 0 && *lx0+(*sx) <= w && *ly0+(*sy) <= h) {
|
||||
if((*lx0) >=0 && (*ly0) >= 0 && *lx0 + (*sx) <= w && *ly0 + (*sy) <= h) {
|
||||
for(ptrdiff_t iy = 0; iy < (*sy); ++iy)
|
||||
memcpy(src[iy], src_pixels+src_stride*((*ly0)+iy)+4*(*lx0), (size_t)(4*(*sx)));
|
||||
memcpy(src[iy], src_pixels+src_stride*((*ly0) + iy) + 4*(*lx0), (size_t)(4*(*sx)));
|
||||
}
|
||||
else {
|
||||
for(ptrdiff_t iy = 0; iy < (*sy); ++iy) for(ptrdiff_t ix = 0; ix < (*sx); ++ix)
|
||||
load_sample(w, h, src_stride, src_pixels, wrap_mode, (*lx0)+ix, (*ly0)+iy, src[iy]+4*ix);
|
||||
load_sample(w, h, src_stride, src_pixels, wrap_mode, (*lx0) + ix, (*ly0) + iy, src[iy] + 4*ix);
|
||||
}
|
||||
}
|
||||
|
||||
@ -315,10 +315,10 @@ static void upscale_block_c(
|
||||
for(ptrdiff_t iy = 0; iy < sy; ++iy)
|
||||
for(ptrdiff_t ix = 0; ix < sx; ++ix)
|
||||
for(ptrdiff_t k = 0; k < 4; ++k)
|
||||
buf[0][iy][ix][k] = (float)(int)src[iy][4*ix+k];
|
||||
buf[0][iy][ix][k] = (float)(int)src[iy][4*ix + k];
|
||||
// Horizontal pass.
|
||||
for(ptrdiff_t ix = 0; ix < BLOCK; ++ix) {
|
||||
#define S(i) (buf[0][iy][lx[ix]+i][k])
|
||||
#define S(i) (buf[0][iy][lx[ix] + i][k])
|
||||
float C0 = cx[ix][0], C1 = cx[ix][1], C2 = cx[ix][2], C3 = cx[ix][3];
|
||||
for(ptrdiff_t iy = 0; iy < sy; ++iy)
|
||||
for(ptrdiff_t k = 0; k < 4; ++k)
|
||||
@ -342,9 +342,9 @@ static void upscale_block_c(
|
||||
float C = buf[0][iy][ix][k];
|
||||
if(!(C>0.0f)) C = 0.0f;
|
||||
if(C>255.0f) C = 255.0f;
|
||||
pixel[k] = (u8)(int)(C+0.5f);
|
||||
pixel[k] = (u8)(int)(C + 0.5f);
|
||||
}
|
||||
memcpy(dst_pixels+4*(BLOCK*iy+ix), pixel, 4);
|
||||
memcpy(dst_pixels + 4*(BLOCK*iy + ix), pixel, 4);
|
||||
}
|
||||
}
|
||||
|
||||
@ -377,7 +377,7 @@ static void upscale_block_sse2(
|
||||
for(ptrdiff_t iy = 0; iy < sy; ++iy)
|
||||
for(ptrdiff_t ix = 0; ix < sx; ++ix) {
|
||||
int pixel;
|
||||
memcpy(&pixel, src[iy]+4*ix, 4);
|
||||
memcpy(&pixel, src[iy] + 4*ix, 4);
|
||||
__m128i C = _mm_cvtsi32_si128(pixel);
|
||||
C = _mm_unpacklo_epi8(C, _mm_set1_epi32(0));
|
||||
C = _mm_unpacklo_epi8(C, _mm_set1_epi32(0));
|
||||
@ -385,7 +385,7 @@ static void upscale_block_sse2(
|
||||
}
|
||||
// Horizontal pass.
|
||||
for(ptrdiff_t ix = 0; ix < BLOCK; ++ix) {
|
||||
#define S(i) (buf[0][iy][lx[ix]+i])
|
||||
#define S(i) (buf[0][iy][lx[ix] + i])
|
||||
__m128 C0 = _mm_set1_ps(cx[ix][0]),
|
||||
C1 = _mm_set1_ps(cx[ix][1]),
|
||||
C2 = _mm_set1_ps(cx[ix][2]),
|
||||
@ -395,12 +395,12 @@ static void upscale_block_sse2(
|
||||
_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(0)), C0),
|
||||
_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(1)), C1),
|
||||
_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(2)), C2),
|
||||
_mm_mul_ps(_mm_loadu_ps(S(3)), C3)))));
|
||||
_mm_mul_ps(_mm_loadu_ps(S(3)), C3)))));
|
||||
#undef S
|
||||
}
|
||||
// Vertical pass.
|
||||
for(ptrdiff_t iy = 0; iy < BLOCK; ++iy) {
|
||||
#define S(i) (buf[1][ly[iy]+i][ix])
|
||||
#define S(i) (buf[1][ly[iy] + i][ix])
|
||||
__m128 C0 = _mm_set1_ps(cy[iy][0]),
|
||||
C1 = _mm_set1_ps(cy[iy][1]),
|
||||
C2 = _mm_set1_ps(cy[iy][2]),
|
||||
@ -410,7 +410,7 @@ static void upscale_block_sse2(
|
||||
_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(0)), C0),
|
||||
_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(1)), C1),
|
||||
_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(2)), C2),
|
||||
_mm_mul_ps(_mm_loadu_ps(S(3)), C3)))));
|
||||
_mm_mul_ps(_mm_loadu_ps(S(3)), C3)))));
|
||||
#undef S
|
||||
}
|
||||
// Pack destination pixels.
|
||||
@ -423,7 +423,7 @@ static void upscale_block_sse2(
|
||||
R = _mm_packus_epi16(R, R);
|
||||
R = _mm_packus_epi16(R, R);
|
||||
int pixel = _mm_cvtsi128_si32(R);
|
||||
memcpy(dst_pixels+4*(BLOCK*iy+ix), &pixel, 4);
|
||||
memcpy(dst_pixels + 4*(BLOCK*iy+ix), &pixel, 4);
|
||||
}
|
||||
}
|
||||
#endif // defined(_M_SSE)
|
||||
@ -442,7 +442,7 @@ static void upscale_cubic(
|
||||
upscale_block_c (width, height, src_stride_in_bytes, (const u8*)src_pixels, wrap_mode, scale, B, C, x, y, pixels);
|
||||
#endif
|
||||
for(ptrdiff_t iy = 0, ny = (y1-y < BLOCK?y1-y:BLOCK), nx = (x1-x < BLOCK?x1-x:BLOCK); iy < ny; ++iy)
|
||||
memcpy((u8*)dst_pixels+dst_stride_in_bytes*(y+iy)+4*x, pixels+BLOCK*4*iy, (size_t)(4*nx));
|
||||
memcpy((u8*)dst_pixels + dst_stride_in_bytes*(y+iy) + 4*x, pixels + BLOCK*4*iy, (size_t)(4*nx));
|
||||
}
|
||||
}
|
||||
|
||||
@ -450,7 +450,7 @@ static void upscale_cubic(
|
||||
|
||||
void scaleBicubicBSpline(int factor, u32* data, u32* out, int w, int h, int l, int u) {
|
||||
const float B = 1.0f, C = 0.0f;
|
||||
const int wrap_mode = 0; // Clamp
|
||||
const int wrap_mode = 1; // Clamp
|
||||
upscale_cubic(
|
||||
w, h, w*4, data,
|
||||
factor*w*4, out,
|
||||
@ -460,7 +460,7 @@ void scaleBicubicBSpline(int factor, u32* data, u32* out, int w, int h, int l, i
|
||||
|
||||
void scaleBicubicMitchell(int factor, u32* data, u32* out, int w, int h, int l, int u) {
|
||||
const float B = 0.0f, C = 0.5f; // Actually, Catmull-Rom
|
||||
const int wrap_mode = 0; // Clamp
|
||||
const int wrap_mode = 1; // Clamp
|
||||
upscale_cubic(
|
||||
w, h, w*4, data,
|
||||
factor*w*4, out,
|
||||
|
Loading…
x
Reference in New Issue
Block a user