Timers: Work around possible MSVC codegen bug
Some checks are pending
Create rolling release / Windows x64 Build (push) Waiting to run
Create rolling release / Windows x64 SSE2 Build (push) Waiting to run
Create rolling release / Windows ARM64 Build (push) Waiting to run
Create rolling release / Linux x64 AppImage (push) Waiting to run
Create rolling release / Linux x64 SSE2 AppImage (push) Waiting to run
Create rolling release / Linux Flatpak Build (push) Waiting to run
Create rolling release / MacOS Universal Build (push) Waiting to run
Create rolling release / Create Release (push) Blocked by required conditions

Fixes very strange behaviour in games in MSVC LTCG build.
This commit is contained in:
Stenzek 2024-11-04 22:18:58 +10:00
parent 4c1377774d
commit ca2ed61dba
No known key found for this signature in database
2 changed files with 36 additions and 12 deletions

View File

@ -2,14 +2,10 @@
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
#pragma once
#include "types.h"
#include <type_traits>
// Disable MSVC warnings that we actually handle
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4800) // warning C4800: 'int': forcing value to bool 'true' or 'false' (performance warning)
#endif
#include "types.h"
#include <type_traits>
template<typename BackingDataType, typename DataType, unsigned BitIndex, unsigned BitCount>
struct BitField
@ -138,7 +134,3 @@ struct BitField
BackingDataType data;
};
#ifdef _MSC_VER
#pragma warning(pop)
#endif

View File

@ -391,7 +391,39 @@ void Timers::WriteRegister(u32 offset, u32 value)
DEBUG_LOG("Timer {} write mode register 0x{:04X}", timer_index, value);
cs.mode.bits = (value & WRITE_MASK) | (cs.mode.bits & ~WRITE_MASK);
cs.use_external_clock = (cs.mode.clock_source & (timer_index == 2 ? 2 : 1)) != 0;
// Why is this extra assignment here? MSVC compiler bugs it seems.
// Without the copy in the local variable, when compiling with LTCG, it generates:
//
// 00007FF7BAF721E1 41 C1 EA 08 shr r10d,8
// 00007FF7BAF721E5 41 83 E2 01 and r10d,1
// 00007FF7BAF721E9 0F 95 C0 setne al
//
// and without LTCG, or with the copy:
//
// 00007FF7C1859732 C1 EA 08 shr edx,8
// 00007FF7C1859735 83 FB 02 cmp ebx,2
// 00007FF7C1859738 0F 94 C0 sete al
// 00007FF7C185973B FF C0 inc eax
// 00007FF7C185973D 84 D0 test al,dl
// 00007FF7C185973F 0F 95 C0 setne al
//
// In other words, we get:
// use_external_clock = (clock_source & 1) != 0
// and
// use_external_clock = (clock_source & ((timer_index == 2) + 1)) != 0
//
// The entire "timer_index == 2" sub-expression is ignored. Furthermore, the
// and r10d, 1; setne al sequence doesn't make sense to me either, since r10d & 1 is going
// to always be equal to !ZF...
//
// I can't seem to make a minimal repro case for it, and if you do _anything_ inbetween
// the statements (e.g. printf), it doesn't generate the bad code. But at least this works
// around it for now.
//
const u8 clock_source = cs.mode.clock_source;
cs.use_external_clock = (clock_source & (timer_index == 2 ? 2 : 1)) != 0;
cs.counter = 0;
cs.irq_done = false;
InterruptController::SetLineState(