[video_core/host1x/vic] Fix a Regression in Big Brain Academy in Linux (#3412)

Patch from Lizz

Fixes out of bounds read/writes from vector implementation of the VIC decoder

Authored-by: lizzie <lizzie@eden-emu.dev>
Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/3412
Reviewed-by: MaranBr <maranbr@eden-emu.dev>
Co-authored-by: John <john@eden-emu.dev>
Co-committed-by: John <john@eden-emu.dev>
This commit is contained in:
John
2026-01-28 23:50:59 +01:00
committed by crueter
parent c974f3e0fb
commit 3b81d2e333

View File

@@ -188,7 +188,7 @@ void Vic::ReadProgressiveY8__V8U8_N420(const SlotStruct& slot, std::span<const P
out_luma_height, out_luma_stride);
slot_surface.resize_destructive(out_luma_width * out_luma_height);
if (COMPILED_HAS_SSE41 && HasSSE41()) {
if (COMPILED_HAS_SSE41 && HasSSE41() && in_luma_width % 16 == 0) {
#if COMPILED_HAS_SSE41
auto const alpha_linear = u16(slot.config.planar_alpha.Value());
auto const alpha = _mm_slli_epi64(_mm_set1_epi64x(s64(slot.config.planar_alpha.Value())), 48);
@@ -491,7 +491,7 @@ void Vic::Blend(const ConfigStruct& config, const SlotStruct& slot, VideoPixelFo
// TODO Alpha blending. No games I've seen use more than a single surface or supply an alpha
// below max, so it's ignored for now.
if (slot.color_matrix.matrix_enable) {
if (COMPILED_HAS_SSE41 && HasSSE41()) {
if (COMPILED_HAS_SSE41 && HasSSE41() && source_left % 8 == 0 && source_right % 8 == 0) {
// MSVC doesn't define __SSE4_1__
#if COMPILED_HAS_SSE41
// Fill the columns, e.g
@@ -707,7 +707,7 @@ void Vic::WriteY8__V8U8_N420(const OutputSurfaceConfig& output_surface_config) n
surface_height = (std::min)(surface_height, out_luma_height);
auto Decode = [&](u8* out_luma, u8* out_chroma) {
if (COMPILED_HAS_SSE41 && HasSSE41()) {
if (COMPILED_HAS_SSE41 && HasSSE41() && surface_width % 16 == 0) {
#if COMPILED_HAS_SSE41
// luma_mask = [00 00] [00 00] [00 00] [FF FF] [00 00] [00 00] [00 00] [FF FF]
auto const luma_mask = _mm_set_epi16(0, 0, 0, -1, 0, 0, 0, -1);
@@ -935,15 +935,14 @@ void Vic::WriteABGR(const OutputSurfaceConfig& output_surface_config, VideoPixel
surface_height = (std::min)(surface_height, out_luma_height);
auto Decode = [&](u8* out, Pixel const* inp) {
if (COMPILED_HAS_SSE41 && HasSSE41()) {
if (COMPILED_HAS_SSE41 && HasSSE41() && surface_width % 16 == 0) {
#if COMPILED_HAS_SSE41
size_t const SSE_ALIGNMENT = 16;
auto const sse_aligned_width = Common::AlignDown(surface_width, SSE_ALIGNMENT);
auto const sse_aligned_width = Common::AlignDown(surface_width, 16);
for (u32 y = 0; y < surface_height; y++) {
auto const src = y * surface_stride;
auto const dst = y * out_luma_stride;
u32 x = 0;
for (; x < sse_aligned_width; x += SSE_ALIGNMENT) {
for (; x < sse_aligned_width; x += 16) {
// Prefetch the next 2 cache lines
_mm_prefetch((const char*)&inp[src + x + 16], _MM_HINT_T0);
_mm_prefetch((const char*)&inp[src + x + 24], _MM_HINT_T0);