Change wrap_mode to clamp for bicubic upscaler

It was set to wrap, even though the comment claimed otherwise. Previous implementation had clamp, as do (I think) other upscaling modes (Hybrid, etc.). Also make upscaler codestyle a little more consistent with the rest.
2025-02-21 14:41:39 +00:00 · 2022-10-20 06:53:49 -04:00 · 2022-10-20 06:53:49 -04:00 · 9a01db5f42
commit 9a01db5f42
parent 9ae42a2b14
1 changed files with 23 additions and 23 deletions
--- a/GPU/Common/TextureScalerCommon.cpp
+++ b/GPU/Common/TextureScalerCommon.cpp
@ -211,10 +211,10 @@ static void load_sample(ptrdiff_t w, ptrdiff_t h, ptrdiff_t s, const u8 *pixels,
 	// equivalent to a>=0&&a<b.
 	static_assert(sizeof(ptrdiff_t) == sizeof(size_t), "Assumes ptrdiff_t same width as size_t");

-	if((size_t)x >= (size_t)w||(size_t)y >= (size_t)h) {
+	if((size_t)x >= (size_t)w || (size_t)y >= (size_t)h) {
 		switch(wrap_mode) {
 			case 0: // Wrap
-				if(!((w&(w-1))|(h&(h-1)))) {
+				if(!((w & (w-1)) | (h & (h-1)))) {
 					// Both w and h are powers of 2.
 					x &= w-1;
 					y &= h-1;
@ -239,7 +239,7 @@ static void load_sample(ptrdiff_t w, ptrdiff_t h, ptrdiff_t s, const u8 *pixels,
 				break;
 		}
 	}
-	memcpy(output, pixels+s*y+4*x, 4);
+	memcpy(output, pixels + s*y + 4*x, 4);
 }

 #define BLOCK 8
@ -260,7 +260,7 @@ static void init_block(
 	float   w00 = B/6.0f     ,  w01 = -C-0.5f*B,  w02 = 2.0f*C+0.5f*B      , w03 = -C-B/6.0f     ;
 	float   w10 = 1.0f-B/3.0f,/*w11 = 0.0f     ,*/w12 = C+2.0f*B-3.0f      , w13 = -C-1.5f*B+2.0f;
 	float   w20 = B/6.0f     ,  w21 =  C+0.5f*B,  w22 = -2.0f*C-2.5f*B+3.0f, w23 =  C+1.5f*B-2.0f;
-	float /*w30 = 0.0f       ,  w31 = 0.0f     ,*/w32 = -C                 , w33 =  C+B/6.0f      ;
+	float /*w30 = 0.0f       ,  w31 = 0.0f     ,*/w32 = -C                 , w33 =  C+B/6.0f     ;
 	// Express the sampling position as a rational
 	// number num/den-1 (off by one, so that num is
 	// always positive, since the C language does
@ -269,9 +269,9 @@ static void init_block(
 	ptrdiff_t den = 2*factor;
 	float inv_den = 1.0f/(float)den;
 	for(int dir = 0; dir < 2; ++dir) {
-		ptrdiff_t num = (dir?2*y0+1+factor:2*x0+1+factor);
-		ptrdiff_t *l = (dir?ly:lx), *l0 = (dir?ly0:lx0), *s = (dir?sy:sx);
-		float (*c)[4] = (dir?cy:cx);
+		ptrdiff_t num = (dir ? 2*y0+1+factor : 2*x0+1+factor);
+		ptrdiff_t *l = (dir ? ly : lx), *l0 = (dir ? ly0 : lx0), *s = (dir ? sy : sx);
+		float (*c)[4] = (dir ? cy : cx);
 		(*l0) = num/den-2;
 		num = num%den;
 		for(ptrdiff_t i = 0, j = 0; i < BLOCK; ++i) {
@ -288,13 +288,13 @@ static void init_block(
 		(*s) = l[BLOCK-1]+4; // Total sampled src pixels in {x|y} direction.
 	}
 	// Get a local copy of the source pixels.
-	if((*lx0) >=0 && (*ly0) >= 0 && *lx0+(*sx) <= w && *ly0+(*sy) <= h) {
+	if((*lx0) >=0 && (*ly0) >= 0 && *lx0 + (*sx) <= w && *ly0 + (*sy) <= h) {
 		for(ptrdiff_t iy = 0; iy < (*sy); ++iy)
-			memcpy(src[iy], src_pixels+src_stride*((*ly0)+iy)+4*(*lx0), (size_t)(4*(*sx)));
+			memcpy(src[iy], src_pixels+src_stride*((*ly0) + iy) + 4*(*lx0), (size_t)(4*(*sx)));
 	}
 	else {
 		for(ptrdiff_t iy = 0; iy < (*sy); ++iy) for(ptrdiff_t ix = 0; ix < (*sx); ++ix)
-			load_sample(w, h, src_stride, src_pixels, wrap_mode, (*lx0)+ix, (*ly0)+iy, src[iy]+4*ix);
+			load_sample(w, h, src_stride, src_pixels, wrap_mode, (*lx0) + ix, (*ly0) + iy, src[iy] + 4*ix);
 	}
 }

@ -315,10 +315,10 @@ static void upscale_block_c(
 	for(ptrdiff_t iy = 0; iy < sy; ++iy)
 		for(ptrdiff_t ix = 0; ix < sx; ++ix)
 			for(ptrdiff_t k = 0; k < 4; ++k)
-				buf[0][iy][ix][k] = (float)(int)src[iy][4*ix+k];
+				buf[0][iy][ix][k] = (float)(int)src[iy][4*ix + k];
 	// Horizontal pass.
 	for(ptrdiff_t ix = 0; ix < BLOCK; ++ix) {
-		#define S(i) (buf[0][iy][lx[ix]+i][k])
+		#define S(i) (buf[0][iy][lx[ix] + i][k])
 		float C0 = cx[ix][0], C1 = cx[ix][1], C2 = cx[ix][2], C3 = cx[ix][3];
 		for(ptrdiff_t iy = 0; iy < sy; ++iy)
 			for(ptrdiff_t k = 0; k < 4; ++k)
@ -342,9 +342,9 @@ static void upscale_block_c(
 				float C = buf[0][iy][ix][k];
 				if(!(C>0.0f)) C = 0.0f;
 				if(C>255.0f)  C = 255.0f;
-				pixel[k] = (u8)(int)(C+0.5f);
+				pixel[k] = (u8)(int)(C + 0.5f);
 			}
-			memcpy(dst_pixels+4*(BLOCK*iy+ix), pixel, 4);
+			memcpy(dst_pixels + 4*(BLOCK*iy + ix), pixel, 4);
 		}
 }

@ -377,7 +377,7 @@ static void upscale_block_sse2(
 	for(ptrdiff_t iy = 0; iy < sy; ++iy)
 		for(ptrdiff_t ix = 0; ix < sx; ++ix) {
 			int pixel;
-			memcpy(&pixel, src[iy]+4*ix, 4);
+			memcpy(&pixel, src[iy] + 4*ix, 4);
 			__m128i C = _mm_cvtsi32_si128(pixel);
 			C = _mm_unpacklo_epi8(C, _mm_set1_epi32(0));
 			C = _mm_unpacklo_epi8(C, _mm_set1_epi32(0));
@ -385,7 +385,7 @@ static void upscale_block_sse2(
 		}
 	// Horizontal pass.
 	for(ptrdiff_t ix = 0; ix < BLOCK; ++ix) {
-		#define S(i) (buf[0][iy][lx[ix]+i])
+		#define S(i) (buf[0][iy][lx[ix] + i])
 		__m128 C0 = _mm_set1_ps(cx[ix][0]),
 			C1 = _mm_set1_ps(cx[ix][1]),
 			C2 = _mm_set1_ps(cx[ix][2]),
@ -395,12 +395,12 @@ static void upscale_block_sse2(
 				_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(0)), C0),
 				_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(1)), C1),
 				_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(2)), C2),
-						_mm_mul_ps(_mm_loadu_ps(S(3)), C3)))));
+						   _mm_mul_ps(_mm_loadu_ps(S(3)), C3)))));
 		#undef S
 	}
 	// Vertical pass.
 	for(ptrdiff_t iy = 0; iy < BLOCK; ++iy) {
-		#define S(i) (buf[1][ly[iy]+i][ix])
+		#define S(i) (buf[1][ly[iy] + i][ix])
 		__m128 C0 = _mm_set1_ps(cy[iy][0]),
 			C1 = _mm_set1_ps(cy[iy][1]),
 			C2 = _mm_set1_ps(cy[iy][2]),
@ -410,7 +410,7 @@ static void upscale_block_sse2(
 				_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(0)), C0),
 				_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(1)), C1),
 				_mm_add_ps(_mm_mul_ps(_mm_loadu_ps(S(2)), C2),
-						_mm_mul_ps(_mm_loadu_ps(S(3)), C3)))));
+						   _mm_mul_ps(_mm_loadu_ps(S(3)), C3)))));
 		#undef S
 	}
 	// Pack destination pixels.
@ -423,7 +423,7 @@ static void upscale_block_sse2(
 			R = _mm_packus_epi16(R, R);
 			R = _mm_packus_epi16(R, R);
 			int pixel = _mm_cvtsi128_si32(R);
-			memcpy(dst_pixels+4*(BLOCK*iy+ix), &pixel, 4);
+			memcpy(dst_pixels + 4*(BLOCK*iy+ix), &pixel, 4);
 		}
 }
 #endif // defined(_M_SSE)
@ -442,7 +442,7 @@ static void upscale_cubic(
 			upscale_block_c   (width, height, src_stride_in_bytes, (const u8*)src_pixels, wrap_mode, scale, B, C, x, y, pixels);
 #endif
 			for(ptrdiff_t iy = 0, ny = (y1-y < BLOCK?y1-y:BLOCK), nx = (x1-x < BLOCK?x1-x:BLOCK); iy < ny; ++iy)
-				memcpy((u8*)dst_pixels+dst_stride_in_bytes*(y+iy)+4*x, pixels+BLOCK*4*iy, (size_t)(4*nx));
+				memcpy((u8*)dst_pixels + dst_stride_in_bytes*(y+iy) + 4*x, pixels + BLOCK*4*iy, (size_t)(4*nx));
 		}
 }

@ -450,7 +450,7 @@ static void upscale_cubic(

 void scaleBicubicBSpline(int factor, u32* data, u32* out, int w, int h, int l, int u) {
 	const float B = 1.0f, C = 0.0f;
-	const int wrap_mode = 0; // Clamp
+	const int wrap_mode = 1; // Clamp
 	upscale_cubic(
 		w, h, w*4, data,
 		factor*w*4, out,
@ -460,7 +460,7 @@ void scaleBicubicBSpline(int factor, u32* data, u32* out, int w, int h, int l, i

 void scaleBicubicMitchell(int factor, u32* data, u32* out, int w, int h, int l, int u) {
 	const float B = 0.0f, C = 0.5f; // Actually, Catmull-Rom
-	const int wrap_mode = 0; // Clamp
+	const int wrap_mode = 1; // Clamp
 	upscale_cubic(
 		w, h, w*4, data,
 		factor*w*4, out,