mirror of
https://git.uzuy-edge.org/Uzuy-Edge/Uzuy
synced 2024-11-23 11:09:47 +00:00
chore: adjust NVN bias offset range for broader memory alignment
- Updated `nvn_bias` structure to extend the offset range. - Adjusted `offset_begin` from 0x110 to 0x100 and `offset_end` from 0x610 to 0x700. - These changes provide a broader range for better memory alignment and handling.
This commit is contained in:
parent
c55a39d199
commit
bac46e649e
@ -1,4 +1,4 @@
|
||||
// SPDX-FileCopyrightText: Copyright 2021 uzuy Emulator Project
|
||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <optional>
|
||||
@ -15,509 +15,509 @@
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
namespace {
|
||||
/// Address in constant buffers to the storage buffer descriptor
|
||||
struct StorageBufferAddr {
|
||||
auto operator<=>(const StorageBufferAddr&) const noexcept = default;
|
||||
struct StorageBufferAddr {
|
||||
auto operator<=>(const StorageBufferAddr&) const noexcept = default;
|
||||
|
||||
u32 index;
|
||||
u32 offset;
|
||||
};
|
||||
u32 index;
|
||||
u32 offset;
|
||||
};
|
||||
|
||||
/// Block iterator to a global memory instruction and the storage buffer it uses
|
||||
struct StorageInst {
|
||||
StorageBufferAddr storage_buffer;
|
||||
IR::Inst* inst;
|
||||
IR::Block* block;
|
||||
};
|
||||
struct StorageInst {
|
||||
StorageBufferAddr storage_buffer;
|
||||
IR::Inst* inst;
|
||||
IR::Block* block;
|
||||
};
|
||||
|
||||
/// Bias towards a certain range of constant buffers when looking for storage buffers
|
||||
struct Bias {
|
||||
u32 index;
|
||||
u32 offset_begin;
|
||||
u32 offset_end;
|
||||
u32 alignment;
|
||||
};
|
||||
struct Bias {
|
||||
u32 index;
|
||||
u32 offset_begin;
|
||||
u32 offset_end;
|
||||
u32 alignment;
|
||||
};
|
||||
|
||||
using boost::container::flat_set;
|
||||
using boost::container::small_vector;
|
||||
using StorageBufferSet =
|
||||
flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
|
||||
using StorageInstVector = small_vector<StorageInst, 24>;
|
||||
using StorageWritesSet =
|
||||
flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
|
||||
using boost::container::flat_set;
|
||||
using boost::container::small_vector;
|
||||
using StorageBufferSet =
|
||||
flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
|
||||
using StorageInstVector = small_vector<StorageInst, 24>;
|
||||
using StorageWritesSet =
|
||||
flat_set<StorageBufferAddr, std::less<StorageBufferAddr>, small_vector<StorageBufferAddr, 16>>;
|
||||
|
||||
struct StorageInfo {
|
||||
StorageBufferSet set;
|
||||
StorageInstVector to_replace;
|
||||
StorageWritesSet writes;
|
||||
};
|
||||
struct StorageInfo {
|
||||
StorageBufferSet set;
|
||||
StorageInstVector to_replace;
|
||||
StorageWritesSet writes;
|
||||
};
|
||||
|
||||
/// Returns true when the instruction is a global memory instruction
|
||||
bool IsGlobalMemory(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadGlobalS8:
|
||||
case IR::Opcode::LoadGlobalU8:
|
||||
case IR::Opcode::LoadGlobalS16:
|
||||
case IR::Opcode::LoadGlobalU16:
|
||||
case IR::Opcode::LoadGlobal32:
|
||||
case IR::Opcode::LoadGlobal64:
|
||||
case IR::Opcode::LoadGlobal128:
|
||||
case IR::Opcode::WriteGlobalS8:
|
||||
case IR::Opcode::WriteGlobalU8:
|
||||
case IR::Opcode::WriteGlobalS16:
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
case IR::Opcode::WriteGlobal32:
|
||||
case IR::Opcode::WriteGlobal64:
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
case IR::Opcode::GlobalAtomicIAdd32:
|
||||
case IR::Opcode::GlobalAtomicSMin32:
|
||||
case IR::Opcode::GlobalAtomicUMin32:
|
||||
case IR::Opcode::GlobalAtomicSMax32:
|
||||
case IR::Opcode::GlobalAtomicUMax32:
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
case IR::Opcode::GlobalAtomicAnd32:
|
||||
case IR::Opcode::GlobalAtomicOr32:
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool IsGlobalMemory(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadGlobalS8:
|
||||
case IR::Opcode::LoadGlobalU8:
|
||||
case IR::Opcode::LoadGlobalS16:
|
||||
case IR::Opcode::LoadGlobalU16:
|
||||
case IR::Opcode::LoadGlobal32:
|
||||
case IR::Opcode::LoadGlobal64:
|
||||
case IR::Opcode::LoadGlobal128:
|
||||
case IR::Opcode::WriteGlobalS8:
|
||||
case IR::Opcode::WriteGlobalU8:
|
||||
case IR::Opcode::WriteGlobalS16:
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
case IR::Opcode::WriteGlobal32:
|
||||
case IR::Opcode::WriteGlobal64:
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
case IR::Opcode::GlobalAtomicIAdd32:
|
||||
case IR::Opcode::GlobalAtomicSMin32:
|
||||
case IR::Opcode::GlobalAtomicUMin32:
|
||||
case IR::Opcode::GlobalAtomicSMax32:
|
||||
case IR::Opcode::GlobalAtomicUMax32:
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
case IR::Opcode::GlobalAtomicAnd32:
|
||||
case IR::Opcode::GlobalAtomicOr32:
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true when the instruction is a global memory instruction
|
||||
bool IsGlobalMemoryWrite(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::WriteGlobalS8:
|
||||
case IR::Opcode::WriteGlobalU8:
|
||||
case IR::Opcode::WriteGlobalS16:
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
case IR::Opcode::WriteGlobal32:
|
||||
case IR::Opcode::WriteGlobal64:
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
case IR::Opcode::GlobalAtomicIAdd32:
|
||||
case IR::Opcode::GlobalAtomicSMin32:
|
||||
case IR::Opcode::GlobalAtomicUMin32:
|
||||
case IR::Opcode::GlobalAtomicSMax32:
|
||||
case IR::Opcode::GlobalAtomicUMax32:
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
case IR::Opcode::GlobalAtomicAnd32:
|
||||
case IR::Opcode::GlobalAtomicOr32:
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
bool IsGlobalMemoryWrite(const IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::WriteGlobalS8:
|
||||
case IR::Opcode::WriteGlobalU8:
|
||||
case IR::Opcode::WriteGlobalS16:
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
case IR::Opcode::WriteGlobal32:
|
||||
case IR::Opcode::WriteGlobal64:
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
case IR::Opcode::GlobalAtomicIAdd32:
|
||||
case IR::Opcode::GlobalAtomicSMin32:
|
||||
case IR::Opcode::GlobalAtomicUMin32:
|
||||
case IR::Opcode::GlobalAtomicSMax32:
|
||||
case IR::Opcode::GlobalAtomicUMax32:
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
case IR::Opcode::GlobalAtomicAnd32:
|
||||
case IR::Opcode::GlobalAtomicOr32:
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts a global memory opcode to its storage buffer equivalent
|
||||
IR::Opcode GlobalToStorage(IR::Opcode opcode) {
|
||||
switch (opcode) {
|
||||
case IR::Opcode::LoadGlobalS8:
|
||||
return IR::Opcode::LoadStorageS8;
|
||||
case IR::Opcode::LoadGlobalU8:
|
||||
return IR::Opcode::LoadStorageU8;
|
||||
case IR::Opcode::LoadGlobalS16:
|
||||
return IR::Opcode::LoadStorageS16;
|
||||
case IR::Opcode::LoadGlobalU16:
|
||||
return IR::Opcode::LoadStorageU16;
|
||||
case IR::Opcode::LoadGlobal32:
|
||||
return IR::Opcode::LoadStorage32;
|
||||
case IR::Opcode::LoadGlobal64:
|
||||
return IR::Opcode::LoadStorage64;
|
||||
case IR::Opcode::LoadGlobal128:
|
||||
return IR::Opcode::LoadStorage128;
|
||||
case IR::Opcode::WriteGlobalS8:
|
||||
return IR::Opcode::WriteStorageS8;
|
||||
case IR::Opcode::WriteGlobalU8:
|
||||
return IR::Opcode::WriteStorageU8;
|
||||
case IR::Opcode::WriteGlobalS16:
|
||||
return IR::Opcode::WriteStorageS16;
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
return IR::Opcode::WriteStorageU16;
|
||||
case IR::Opcode::WriteGlobal32:
|
||||
return IR::Opcode::WriteStorage32;
|
||||
case IR::Opcode::WriteGlobal64:
|
||||
return IR::Opcode::WriteStorage64;
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
return IR::Opcode::WriteStorage128;
|
||||
case IR::Opcode::GlobalAtomicIAdd32:
|
||||
return IR::Opcode::StorageAtomicIAdd32;
|
||||
case IR::Opcode::GlobalAtomicSMin32:
|
||||
return IR::Opcode::StorageAtomicSMin32;
|
||||
case IR::Opcode::GlobalAtomicUMin32:
|
||||
return IR::Opcode::StorageAtomicUMin32;
|
||||
case IR::Opcode::GlobalAtomicSMax32:
|
||||
return IR::Opcode::StorageAtomicSMax32;
|
||||
case IR::Opcode::GlobalAtomicUMax32:
|
||||
return IR::Opcode::StorageAtomicUMax32;
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
return IR::Opcode::StorageAtomicInc32;
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
return IR::Opcode::StorageAtomicDec32;
|
||||
case IR::Opcode::GlobalAtomicAnd32:
|
||||
return IR::Opcode::StorageAtomicAnd32;
|
||||
case IR::Opcode::GlobalAtomicOr32:
|
||||
return IR::Opcode::StorageAtomicOr32;
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
return IR::Opcode::StorageAtomicXor32;
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
return IR::Opcode::StorageAtomicExchange32;
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
return IR::Opcode::StorageAtomicIAdd64;
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
return IR::Opcode::StorageAtomicSMin64;
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
return IR::Opcode::StorageAtomicUMin64;
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
return IR::Opcode::StorageAtomicSMax64;
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
return IR::Opcode::StorageAtomicUMax64;
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
return IR::Opcode::StorageAtomicAnd64;
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
return IR::Opcode::StorageAtomicOr64;
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
return IR::Opcode::StorageAtomicXor64;
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
return IR::Opcode::StorageAtomicExchange64;
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
return IR::Opcode::StorageAtomicIAdd32x2;
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
return IR::Opcode::StorageAtomicSMin32x2;
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
return IR::Opcode::StorageAtomicUMin32x2;
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
return IR::Opcode::StorageAtomicSMax32x2;
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
return IR::Opcode::StorageAtomicUMax32x2;
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
return IR::Opcode::StorageAtomicAnd32x2;
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
return IR::Opcode::StorageAtomicOr32x2;
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
return IR::Opcode::StorageAtomicXor32x2;
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
return IR::Opcode::StorageAtomicExchange32x2;
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
return IR::Opcode::StorageAtomicAddF32;
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
return IR::Opcode::StorageAtomicAddF16x2;
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
return IR::Opcode::StorageAtomicMinF16x2;
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
return IR::Opcode::StorageAtomicMaxF16x2;
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
return IR::Opcode::StorageAtomicAddF32x2;
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
return IR::Opcode::StorageAtomicMinF32x2;
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
return IR::Opcode::StorageAtomicMaxF32x2;
|
||||
default:
|
||||
throw InvalidArgument("Invalid global memory opcode {}", opcode);
|
||||
}
|
||||
}
|
||||
IR::Opcode GlobalToStorage(IR::Opcode opcode) {
|
||||
switch (opcode) {
|
||||
case IR::Opcode::LoadGlobalS8:
|
||||
return IR::Opcode::LoadStorageS8;
|
||||
case IR::Opcode::LoadGlobalU8:
|
||||
return IR::Opcode::LoadStorageU8;
|
||||
case IR::Opcode::LoadGlobalS16:
|
||||
return IR::Opcode::LoadStorageS16;
|
||||
case IR::Opcode::LoadGlobalU16:
|
||||
return IR::Opcode::LoadStorageU16;
|
||||
case IR::Opcode::LoadGlobal32:
|
||||
return IR::Opcode::LoadStorage32;
|
||||
case IR::Opcode::LoadGlobal64:
|
||||
return IR::Opcode::LoadStorage64;
|
||||
case IR::Opcode::LoadGlobal128:
|
||||
return IR::Opcode::LoadStorage128;
|
||||
case IR::Opcode::WriteGlobalS8:
|
||||
return IR::Opcode::WriteStorageS8;
|
||||
case IR::Opcode::WriteGlobalU8:
|
||||
return IR::Opcode::WriteStorageU8;
|
||||
case IR::Opcode::WriteGlobalS16:
|
||||
return IR::Opcode::WriteStorageS16;
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
return IR::Opcode::WriteStorageU16;
|
||||
case IR::Opcode::WriteGlobal32:
|
||||
return IR::Opcode::WriteStorage32;
|
||||
case IR::Opcode::WriteGlobal64:
|
||||
return IR::Opcode::WriteStorage64;
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
return IR::Opcode::WriteStorage128;
|
||||
case IR::Opcode::GlobalAtomicIAdd32:
|
||||
return IR::Opcode::StorageAtomicIAdd32;
|
||||
case IR::Opcode::GlobalAtomicSMin32:
|
||||
return IR::Opcode::StorageAtomicSMin32;
|
||||
case IR::Opcode::GlobalAtomicUMin32:
|
||||
return IR::Opcode::StorageAtomicUMin32;
|
||||
case IR::Opcode::GlobalAtomicSMax32:
|
||||
return IR::Opcode::StorageAtomicSMax32;
|
||||
case IR::Opcode::GlobalAtomicUMax32:
|
||||
return IR::Opcode::StorageAtomicUMax32;
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
return IR::Opcode::StorageAtomicInc32;
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
return IR::Opcode::StorageAtomicDec32;
|
||||
case IR::Opcode::GlobalAtomicAnd32:
|
||||
return IR::Opcode::StorageAtomicAnd32;
|
||||
case IR::Opcode::GlobalAtomicOr32:
|
||||
return IR::Opcode::StorageAtomicOr32;
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
return IR::Opcode::StorageAtomicXor32;
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
return IR::Opcode::StorageAtomicExchange32;
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
return IR::Opcode::StorageAtomicIAdd64;
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
return IR::Opcode::StorageAtomicSMin64;
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
return IR::Opcode::StorageAtomicUMin64;
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
return IR::Opcode::StorageAtomicSMax64;
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
return IR::Opcode::StorageAtomicUMax64;
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
return IR::Opcode::StorageAtomicAnd64;
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
return IR::Opcode::StorageAtomicOr64;
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
return IR::Opcode::StorageAtomicXor64;
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
return IR::Opcode::StorageAtomicExchange64;
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
return IR::Opcode::StorageAtomicIAdd32x2;
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
return IR::Opcode::StorageAtomicSMin32x2;
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
return IR::Opcode::StorageAtomicUMin32x2;
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
return IR::Opcode::StorageAtomicSMax32x2;
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
return IR::Opcode::StorageAtomicUMax32x2;
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
return IR::Opcode::StorageAtomicAnd32x2;
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
return IR::Opcode::StorageAtomicOr32x2;
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
return IR::Opcode::StorageAtomicXor32x2;
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
return IR::Opcode::StorageAtomicExchange32x2;
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
return IR::Opcode::StorageAtomicAddF32;
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
return IR::Opcode::StorageAtomicAddF16x2;
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
return IR::Opcode::StorageAtomicMinF16x2;
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
return IR::Opcode::StorageAtomicMaxF16x2;
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
return IR::Opcode::StorageAtomicAddF32x2;
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
return IR::Opcode::StorageAtomicMinF32x2;
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
return IR::Opcode::StorageAtomicMaxF32x2;
|
||||
default:
|
||||
throw InvalidArgument("Invalid global memory opcode {}", opcode);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true when a storage buffer address satisfies a bias
|
||||
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
|
||||
return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
|
||||
storage_buffer.offset < bias.offset_end;
|
||||
}
|
||||
bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexcept {
|
||||
return storage_buffer.index == bias.index && storage_buffer.offset >= bias.offset_begin &&
|
||||
storage_buffer.offset < bias.offset_end;
|
||||
}
|
||||
|
||||
struct LowAddrInfo {
|
||||
IR::U32 value;
|
||||
s32 imm_offset;
|
||||
};
|
||||
struct LowAddrInfo {
|
||||
IR::U32 value;
|
||||
s32 imm_offset;
|
||||
};
|
||||
|
||||
/// Tries to track the first 32-bits of a global memory instruction
|
||||
std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
|
||||
// The first argument is the low level GPU pointer to the global memory instruction
|
||||
const IR::Value addr{inst->Arg(0)};
|
||||
if (addr.IsImmediate()) {
|
||||
// Not much we can do if it's an immediate
|
||||
return std::nullopt;
|
||||
}
|
||||
// This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2
|
||||
IR::Inst* addr_inst{addr.InstRecursive()};
|
||||
s32 imm_offset{0};
|
||||
if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
|
||||
// If it's an IAdd64, get the immediate offset it is applying and grab the address
|
||||
// instruction. This expects for the instruction to be canonicalized having the address on
|
||||
// the first argument and the immediate offset on the second one.
|
||||
const IR::U64 imm_offset_value{addr_inst->Arg(1)};
|
||||
if (!imm_offset_value.IsImmediate()) {
|
||||
std::optional<LowAddrInfo> TrackLowAddress(IR::Inst* inst) {
|
||||
// The first argument is the low level GPU pointer to the global memory instruction
|
||||
const IR::Value addr{inst->Arg(0)};
|
||||
if (addr.IsImmediate()) {
|
||||
// Not much we can do if it's an immediate
|
||||
return std::nullopt;
|
||||
}
|
||||
imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
|
||||
const IR::U64 iadd_addr{addr_inst->Arg(0)};
|
||||
if (iadd_addr.IsImmediate()) {
|
||||
// This address is expected to either be a PackUint2x32, a IAdd64, or a CompositeConstructU32x2
|
||||
IR::Inst* addr_inst{addr.InstRecursive()};
|
||||
s32 imm_offset{0};
|
||||
if (addr_inst->GetOpcode() == IR::Opcode::IAdd64) {
|
||||
// If it's an IAdd64, get the immediate offset it is applying and grab the address
|
||||
// instruction. This expects for the instruction to be canonicalized having the address on
|
||||
// the first argument and the immediate offset on the second one.
|
||||
const IR::U64 imm_offset_value{addr_inst->Arg(1)};
|
||||
if (!imm_offset_value.IsImmediate()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
imm_offset = static_cast<s32>(static_cast<s64>(imm_offset_value.U64()));
|
||||
const IR::U64 iadd_addr{addr_inst->Arg(0)};
|
||||
if (iadd_addr.IsImmediate()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
addr_inst = iadd_addr.InstRecursive();
|
||||
}
|
||||
// With IAdd64 handled, now PackUint2x32 is expected
|
||||
if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) {
|
||||
// PackUint2x32 is expected to be generated from a vector
|
||||
const IR::Value vector{addr_inst->Arg(0)};
|
||||
if (vector.IsImmediate()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
addr_inst = vector.InstRecursive();
|
||||
}
|
||||
// The vector is expected to be a CompositeConstructU32x2
|
||||
if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
|
||||
return std::nullopt;
|
||||
}
|
||||
addr_inst = iadd_addr.InstRecursive();
|
||||
// Grab the first argument from the CompositeConstructU32x2, this is the low address.
|
||||
return LowAddrInfo{
|
||||
.value{IR::U32{addr_inst->Arg(0)}},
|
||||
.imm_offset = imm_offset,
|
||||
};
|
||||
}
|
||||
// With IAdd64 handled, now PackUint2x32 is expected
|
||||
if (addr_inst->GetOpcode() == IR::Opcode::PackUint2x32) {
|
||||
// PackUint2x32 is expected to be generated from a vector
|
||||
const IR::Value vector{addr_inst->Arg(0)};
|
||||
if (vector.IsImmediate()) {
|
||||
return std::nullopt;
|
||||
}
|
||||
addr_inst = vector.InstRecursive();
|
||||
}
|
||||
// The vector is expected to be a CompositeConstructU32x2
|
||||
if (addr_inst->GetOpcode() != IR::Opcode::CompositeConstructU32x2) {
|
||||
return std::nullopt;
|
||||
}
|
||||
// Grab the first argument from the CompositeConstructU32x2, this is the low address.
|
||||
return LowAddrInfo{
|
||||
.value{IR::U32{addr_inst->Arg(0)}},
|
||||
.imm_offset = imm_offset,
|
||||
};
|
||||
}
|
||||
|
||||
/// Tries to track the storage buffer address used by a global memory instruction
|
||||
std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
|
||||
const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
|
||||
if (inst->GetOpcode() != IR::Opcode::GetCbufU32 &&
|
||||
inst->GetOpcode() != IR::Opcode::GetCbufU32x2) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const IR::Value index{inst->Arg(0)};
|
||||
const IR::Value offset{inst->Arg(1)};
|
||||
if (!index.IsImmediate()) {
|
||||
// Definitely not a storage buffer if it's read from a
|
||||
// non-immediate index
|
||||
return std::nullopt;
|
||||
}
|
||||
if (!offset.IsImmediate()) {
|
||||
// TODO: Support SSBO arrays
|
||||
return std::nullopt;
|
||||
}
|
||||
const StorageBufferAddr storage_buffer{
|
||||
.index = index.U32(),
|
||||
.offset = offset.U32(),
|
||||
};
|
||||
const u32 alignment{bias ? bias->alignment : 8U};
|
||||
if (!Common::IsAligned(storage_buffer.offset, alignment)) {
|
||||
// The SSBO pointer has to be aligned
|
||||
return std::nullopt;
|
||||
}
|
||||
if (bias && !MeetsBias(storage_buffer, *bias)) {
|
||||
// We have to blacklist some addresses in case we wrongly
|
||||
// point to them
|
||||
return std::nullopt;
|
||||
}
|
||||
return storage_buffer;
|
||||
}};
|
||||
return BreadthFirstSearch(value, pred);
|
||||
}
|
||||
std::optional<StorageBufferAddr> Track(const IR::Value& value, const Bias* bias) {
|
||||
const auto pred{[bias](const IR::Inst* inst) -> std::optional<StorageBufferAddr> {
|
||||
if (inst->GetOpcode() != IR::Opcode::GetCbufU32 &&
|
||||
inst->GetOpcode() != IR::Opcode::GetCbufU32x2) {
|
||||
return std::nullopt;
|
||||
}
|
||||
const IR::Value index{inst->Arg(0)};
|
||||
const IR::Value offset{inst->Arg(1)};
|
||||
if (!index.IsImmediate()) {
|
||||
// Definitely not a storage buffer if it's read from a
|
||||
// non-immediate index
|
||||
return std::nullopt;
|
||||
}
|
||||
if (!offset.IsImmediate()) {
|
||||
// TODO: Support SSBO arrays
|
||||
return std::nullopt;
|
||||
}
|
||||
const StorageBufferAddr storage_buffer{
|
||||
.index = index.U32(),
|
||||
.offset = offset.U32(),
|
||||
};
|
||||
const u32 alignment{bias ? bias->alignment : 8U};
|
||||
if (!Common::IsAligned(storage_buffer.offset, alignment)) {
|
||||
// The SSBO pointer has to be aligned
|
||||
return std::nullopt;
|
||||
}
|
||||
if (bias && !MeetsBias(storage_buffer, *bias)) {
|
||||
// We have to blacklist some addresses in case we wrongly
|
||||
// point to them
|
||||
return std::nullopt;
|
||||
}
|
||||
return storage_buffer;
|
||||
}};
|
||||
return BreadthFirstSearch(value, pred);
|
||||
}
|
||||
|
||||
/// Collects the storage buffer used by a global memory instruction and the instruction itself
|
||||
void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) {
|
||||
// NVN puts storage buffers in a specific range, we have to bias towards these addresses to
|
||||
// avoid getting false positives
|
||||
static constexpr Bias nvn_bias{
|
||||
.index = 0,
|
||||
.offset_begin = 0x110,
|
||||
.offset_end = 0x610,
|
||||
.alignment = 16,
|
||||
};
|
||||
// Track the low address of the instruction
|
||||
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
|
||||
if (!low_addr_info) {
|
||||
// Failed to track the low address, use NVN fallbacks
|
||||
return;
|
||||
}
|
||||
// First try to find storage buffers in the NVN address
|
||||
const IR::U32 low_addr{low_addr_info->value};
|
||||
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
|
||||
if (!storage_buffer) {
|
||||
// If it fails, track without a bias
|
||||
storage_buffer = Track(low_addr, nullptr);
|
||||
if (!storage_buffer) {
|
||||
// If that also fails, use NVN fallbacks
|
||||
LOG_WARNING(Shader, "Storage buffer failed to track, using global memory fallbacks");
|
||||
void CollectStorageBuffers(IR::Block& block, IR::Inst& inst, StorageInfo& info) {
|
||||
// NVN puts storage buffers in a specific range, we have to bias towards these addresses to
|
||||
// avoid getting false positives
|
||||
static constexpr Bias nvn_bias{
|
||||
.index = 0,
|
||||
.offset_begin = 0x100, // Adjusted start for broader range
|
||||
.offset_end = 0x700, // Adjusted end for broader range
|
||||
.alignment = 16, // You may adjust alignment based on the platform (e.g., 32)
|
||||
};
|
||||
// Track the low address of the instruction
|
||||
const std::optional<LowAddrInfo> low_addr_info{TrackLowAddress(&inst)};
|
||||
if (!low_addr_info) {
|
||||
// Failed to track the low address, use NVN fallbacks
|
||||
return;
|
||||
}
|
||||
LOG_WARNING(Shader, "Storage buffer tracked without bias, index {} offset {}",
|
||||
storage_buffer->index, storage_buffer->offset);
|
||||
// First try to find storage buffers in the NVN address
|
||||
const IR::U32 low_addr{low_addr_info->value};
|
||||
std::optional<StorageBufferAddr> storage_buffer{Track(low_addr, &nvn_bias)};
|
||||
if (!storage_buffer) {
|
||||
// If it fails, track without a bias
|
||||
storage_buffer = Track(low_addr, nullptr);
|
||||
if (!storage_buffer) {
|
||||
// If that also fails, use NVN fallbacks
|
||||
LOG_WARNING(Shader, "Storage buffer failed to track, using global memory fallbacks");
|
||||
return;
|
||||
}
|
||||
LOG_WARNING(Shader, "Storage buffer tracked without bias, index {} offset {}",
|
||||
storage_buffer->index, storage_buffer->offset);
|
||||
}
|
||||
// Collect storage buffer and the instruction
|
||||
if (IsGlobalMemoryWrite(inst)) {
|
||||
info.writes.insert(*storage_buffer);
|
||||
}
|
||||
info.set.insert(*storage_buffer);
|
||||
info.to_replace.push_back(StorageInst{
|
||||
.storage_buffer{*storage_buffer},
|
||||
.inst = &inst,
|
||||
.block = &block,
|
||||
});
|
||||
}
|
||||
// Collect storage buffer and the instruction
|
||||
if (IsGlobalMemoryWrite(inst)) {
|
||||
info.writes.insert(*storage_buffer);
|
||||
}
|
||||
info.set.insert(*storage_buffer);
|
||||
info.to_replace.push_back(StorageInst{
|
||||
.storage_buffer{*storage_buffer},
|
||||
.inst = &inst,
|
||||
.block = &block,
|
||||
});
|
||||
}
|
||||
|
||||
/// Returns the offset in indices (not bytes) for an equivalent storage instruction
|
||||
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::U32 offset;
|
||||
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
|
||||
offset = low_addr->value;
|
||||
if (low_addr->imm_offset != 0) {
|
||||
offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
|
||||
IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer, u32 alignment) {
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::U32 offset;
|
||||
if (const std::optional<LowAddrInfo> low_addr{TrackLowAddress(&inst)}) {
|
||||
offset = low_addr->value;
|
||||
if (low_addr->imm_offset != 0) {
|
||||
offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
|
||||
}
|
||||
} else {
|
||||
offset = ir.UConvert(32, IR::U64{inst.Arg(0)});
|
||||
}
|
||||
} else {
|
||||
offset = ir.UConvert(32, IR::U64{inst.Arg(0)});
|
||||
}
|
||||
// Subtract the least significant 32 bits from the guest offset. The result is the storage
|
||||
// buffer offset in bytes.
|
||||
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
||||
// Subtract the least significant 32 bits from the guest offset. The result is the storage
|
||||
// buffer offset in bytes.
|
||||
IR::U32 low_cbuf{ir.GetCbuf(ir.Imm32(buffer.index), ir.Imm32(buffer.offset))};
|
||||
|
||||
// Align the offset base to match the host alignment requirements
|
||||
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
|
||||
return ir.ISub(offset, low_cbuf);
|
||||
}
|
||||
// Align the offset base to match the host alignment requirements
|
||||
low_cbuf = ir.BitwiseAnd(low_cbuf, ir.Imm32(~(alignment - 1U)));
|
||||
return ir.ISub(offset, low_cbuf);
|
||||
}
|
||||
|
||||
/// Replace a global memory load instruction with its storage buffer equivalent
|
||||
void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
const IR::U32& offset) {
|
||||
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
|
||||
inst.ReplaceUsesWith(value);
|
||||
}
|
||||
void ReplaceLoad(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
const IR::U32& offset) {
|
||||
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::Value value{&*block.PrependNewInst(it, new_opcode, {storage_index, offset})};
|
||||
inst.ReplaceUsesWith(value);
|
||||
}
|
||||
|
||||
/// Replace a global memory write instruction with its storage buffer equivalent
|
||||
void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
const IR::U32& offset) {
|
||||
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
|
||||
inst.Invalidate();
|
||||
}
|
||||
void ReplaceWrite(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
const IR::U32& offset) {
|
||||
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)});
|
||||
inst.Invalidate();
|
||||
}
|
||||
|
||||
/// Replace an atomic operation on global memory instruction with its storage buffer equivalent
|
||||
void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
const IR::U32& offset) {
|
||||
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::Value value{
|
||||
&*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
|
||||
inst.ReplaceUsesWith(value);
|
||||
}
|
||||
void ReplaceAtomic(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
const IR::U32& offset) {
|
||||
const IR::Opcode new_opcode{GlobalToStorage(inst.GetOpcode())};
|
||||
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
const IR::Value value{
|
||||
&*block.PrependNewInst(it, new_opcode, {storage_index, offset, inst.Arg(1)})};
|
||||
inst.ReplaceUsesWith(value);
|
||||
}
|
||||
|
||||
/// Replace a global memory instruction with its storage buffer equivalent
|
||||
void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
const IR::U32& offset) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadGlobalS8:
|
||||
case IR::Opcode::LoadGlobalU8:
|
||||
case IR::Opcode::LoadGlobalS16:
|
||||
case IR::Opcode::LoadGlobalU16:
|
||||
case IR::Opcode::LoadGlobal32:
|
||||
case IR::Opcode::LoadGlobal64:
|
||||
case IR::Opcode::LoadGlobal128:
|
||||
return ReplaceLoad(block, inst, storage_index, offset);
|
||||
case IR::Opcode::WriteGlobalS8:
|
||||
case IR::Opcode::WriteGlobalU8:
|
||||
case IR::Opcode::WriteGlobalS16:
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
case IR::Opcode::WriteGlobal32:
|
||||
case IR::Opcode::WriteGlobal64:
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
return ReplaceWrite(block, inst, storage_index, offset);
|
||||
case IR::Opcode::GlobalAtomicIAdd32:
|
||||
case IR::Opcode::GlobalAtomicSMin32:
|
||||
case IR::Opcode::GlobalAtomicUMin32:
|
||||
case IR::Opcode::GlobalAtomicSMax32:
|
||||
case IR::Opcode::GlobalAtomicUMax32:
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
case IR::Opcode::GlobalAtomicAnd32:
|
||||
case IR::Opcode::GlobalAtomicOr32:
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
return ReplaceAtomic(block, inst, storage_index, offset);
|
||||
default:
|
||||
throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
|
||||
void Replace(IR::Block& block, IR::Inst& inst, const IR::U32& storage_index,
|
||||
const IR::U32& offset) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::LoadGlobalS8:
|
||||
case IR::Opcode::LoadGlobalU8:
|
||||
case IR::Opcode::LoadGlobalS16:
|
||||
case IR::Opcode::LoadGlobalU16:
|
||||
case IR::Opcode::LoadGlobal32:
|
||||
case IR::Opcode::LoadGlobal64:
|
||||
case IR::Opcode::LoadGlobal128:
|
||||
return ReplaceLoad(block, inst, storage_index, offset);
|
||||
case IR::Opcode::WriteGlobalS8:
|
||||
case IR::Opcode::WriteGlobalU8:
|
||||
case IR::Opcode::WriteGlobalS16:
|
||||
case IR::Opcode::WriteGlobalU16:
|
||||
case IR::Opcode::WriteGlobal32:
|
||||
case IR::Opcode::WriteGlobal64:
|
||||
case IR::Opcode::WriteGlobal128:
|
||||
return ReplaceWrite(block, inst, storage_index, offset);
|
||||
case IR::Opcode::GlobalAtomicIAdd32:
|
||||
case IR::Opcode::GlobalAtomicSMin32:
|
||||
case IR::Opcode::GlobalAtomicUMin32:
|
||||
case IR::Opcode::GlobalAtomicSMax32:
|
||||
case IR::Opcode::GlobalAtomicUMax32:
|
||||
case IR::Opcode::GlobalAtomicInc32:
|
||||
case IR::Opcode::GlobalAtomicDec32:
|
||||
case IR::Opcode::GlobalAtomicAnd32:
|
||||
case IR::Opcode::GlobalAtomicOr32:
|
||||
case IR::Opcode::GlobalAtomicXor32:
|
||||
case IR::Opcode::GlobalAtomicExchange32:
|
||||
case IR::Opcode::GlobalAtomicIAdd64:
|
||||
case IR::Opcode::GlobalAtomicSMin64:
|
||||
case IR::Opcode::GlobalAtomicUMin64:
|
||||
case IR::Opcode::GlobalAtomicSMax64:
|
||||
case IR::Opcode::GlobalAtomicUMax64:
|
||||
case IR::Opcode::GlobalAtomicAnd64:
|
||||
case IR::Opcode::GlobalAtomicOr64:
|
||||
case IR::Opcode::GlobalAtomicXor64:
|
||||
case IR::Opcode::GlobalAtomicExchange64:
|
||||
case IR::Opcode::GlobalAtomicIAdd32x2:
|
||||
case IR::Opcode::GlobalAtomicSMin32x2:
|
||||
case IR::Opcode::GlobalAtomicUMin32x2:
|
||||
case IR::Opcode::GlobalAtomicSMax32x2:
|
||||
case IR::Opcode::GlobalAtomicUMax32x2:
|
||||
case IR::Opcode::GlobalAtomicAnd32x2:
|
||||
case IR::Opcode::GlobalAtomicOr32x2:
|
||||
case IR::Opcode::GlobalAtomicXor32x2:
|
||||
case IR::Opcode::GlobalAtomicExchange32x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32:
|
||||
case IR::Opcode::GlobalAtomicAddF16x2:
|
||||
case IR::Opcode::GlobalAtomicAddF32x2:
|
||||
case IR::Opcode::GlobalAtomicMinF16x2:
|
||||
case IR::Opcode::GlobalAtomicMinF32x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF16x2:
|
||||
case IR::Opcode::GlobalAtomicMaxF32x2:
|
||||
return ReplaceAtomic(block, inst, storage_index, offset);
|
||||
default:
|
||||
throw InvalidArgument("Invalid global memory opcode {}", inst.GetOpcode());
|
||||
}
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateInfo& host_info) {
|
||||
@ -532,11 +532,11 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn
|
||||
}
|
||||
for (const StorageBufferAddr& storage_buffer : info.set) {
|
||||
program.info.storage_buffers_descriptors.push_back({
|
||||
.cbuf_index = storage_buffer.index,
|
||||
.cbuf_offset = storage_buffer.offset,
|
||||
.count = 1,
|
||||
.is_written = info.writes.contains(storage_buffer),
|
||||
});
|
||||
.cbuf_index = storage_buffer.index,
|
||||
.cbuf_offset = storage_buffer.offset,
|
||||
.count = 1,
|
||||
.is_written = info.writes.contains(storage_buffer),
|
||||
});
|
||||
}
|
||||
for (const StorageInst& storage_inst : info.to_replace) {
|
||||
const StorageBufferAddr storage_buffer{storage_inst.storage_buffer};
|
||||
@ -545,7 +545,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program, const HostTranslateIn
|
||||
IR::Block* const block{storage_inst.block};
|
||||
IR::Inst* const inst{storage_inst.inst};
|
||||
const IR::U32 offset{
|
||||
StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
|
||||
StorageOffset(*block, *inst, storage_buffer, host_info.min_ssbo_alignment)};
|
||||
Replace(*block, *inst, index, offset);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user